From 793eb3eb123664017100dfb1a5407f0da639a339 Mon Sep 17 00:00:00 2001 From: Patrick McCarty Date: Fri, 12 Apr 2013 10:40:25 -0700 Subject: [PATCH] Imported Upstream version 5.8 --- .indent.pro | 31 + COPYING | 340 + EXTENDING.html | 386 + FAQ | 371 + INSTALL | 215 + INSTALL.oth | 73 + MAINTAINERS | 88 + Makefile.in | 222 + NEWS | 871 +++ README | 73 + ant.c | 42 + argproc.c | 505 ++ args.c | 274 + args.h | 63 + asm.c | 387 + asp.c | 328 + awk.c | 81 + basic.c | 203 + beta.c | 321 + c.c | 2932 ++++++++ cobol.c | 50 + config.h.in | 277 + configure | 7704 ++++++++++++++++++++ configure.ac | 532 ++ ctags.1 | 1186 +++ ctags.h | 28 + ctags.html | 2087 ++++++ ctags.spec | 40 + debug.c | 113 + debug.h | 70 + descrip.mms | 68 + dosbatch.c | 42 + e_amiga.h | 24 + e_djgpp.h | 47 + e_mac.h | 143 + e_msoft.h | 76 + e_os2.h | 37 + e_qdos.h | 34 + e_riscos.h | 58 + e_vms.h | 31 + eiffel.c | 1352 ++++ entry.c | 847 +++ entry.h | 103 + erlang.c | 189 + flex.c | 2243 ++++++ fortran.c | 2197 ++++++ general.h | 127 + get.c | 669 ++ get.h | 50 + gnu_regex/.svn/all-wcprops | 47 + gnu_regex/.svn/entries | 112 + gnu_regex/.svn/text-base/README.txt.svn-base | 5 + gnu_regex/.svn/text-base/regcomp.c.svn-base | 3818 ++++++++++ gnu_regex/.svn/text-base/regex.c.svn-base | 74 + gnu_regex/.svn/text-base/regex.h.svn-base | 575 ++ gnu_regex/.svn/text-base/regex_internal.c.svn-base | 1713 +++++ gnu_regex/.svn/text-base/regex_internal.h.svn-base | 773 ++ gnu_regex/.svn/text-base/regexec.c.svn-base | 4338 +++++++++++ gnu_regex/README.txt | 5 + gnu_regex/regcomp.c | 3818 ++++++++++ gnu_regex/regex.c | 74 + gnu_regex/regex.h | 575 ++ gnu_regex/regex_internal.c | 1713 +++++ gnu_regex/regex_internal.h | 773 ++ gnu_regex/regexec.c | 4338 +++++++++++ html.c | 49 + jscript.c | 1572 ++++ keyword.c | 258 + keyword.h | 34 + lisp.c | 139 + lregex.c | 704 ++ lua.c | 133 + mac.c | 273 + magic.diff | 21 + main.c | 579 ++ main.h | 32 + maintainer.mak | 476 ++ make.c | 217 + matlab.c | 44 + mk_bc3.mak | 46 + mk_bc5.mak | 49 + mk_djg.mak | 18 + mk_manx.mak | 65 + mk_mingw.mak | 31 + mk_mpw.mak | 130 + mk_mvc.mak | 40 + mk_os2.mak | 104 + mk_qdos.mak | 100 + mk_sas.mak | 63 + mkinstalldirs | 40 + ocaml.c | 1842 +++++ options.c | 1829 +++++ options.h | 154 + parse.c | 677 ++ parse.h | 129 + parsers.h | 63 + pascal.c | 267 + perl.c | 382 + php.c | 237 + python.c | 771 ++ qdos.c | 106 + read.c | 564 ++ read.h | 116 + readtags.c | 959 +++ readtags.h | 252 + rexx.c | 39 + routines.c | 891 +++ routines.h | 134 + ruby.c | 408 ++ scheme.c | 111 + sh.c | 115 + slang.c | 41 + sml.c | 212 + sort.c | 230 + sort.h | 32 + source.mak | 122 + sql.c | 2112 ++++++ strlist.c | 281 + strlist.h | 54 + tcl.c | 116 + tex.c | 524 ++ verilog.c | 340 + vhdl.c | 835 +++ vim.c | 636 ++ vstring.c | 232 + vstring.h | 85 + yacc.c | 40 + 127 files changed, 72136 insertions(+) create mode 100644 .indent.pro create mode 100644 COPYING create mode 100644 EXTENDING.html create mode 100644 FAQ create mode 100644 INSTALL create mode 100644 INSTALL.oth create mode 100644 MAINTAINERS create mode 100644 Makefile.in create mode 100644 NEWS create mode 100644 README create mode 100644 ant.c create mode 100644 argproc.c create mode 100644 args.c create mode 100644 args.h create mode 100644 asm.c create mode 100644 asp.c create mode 100644 awk.c create mode 100644 basic.c create mode 100644 beta.c create mode 100644 c.c create mode 100644 cobol.c create mode 100644 config.h.in create mode 100755 configure create mode 100644 configure.ac create mode 100644 ctags.1 create mode 100644 ctags.h create mode 100644 ctags.html create mode 100644 ctags.spec create mode 100644 debug.c create mode 100644 debug.h create mode 100644 descrip.mms create mode 100644 dosbatch.c create mode 100644 e_amiga.h create mode 100644 e_djgpp.h create mode 100644 e_mac.h create mode 100644 e_msoft.h create mode 100644 e_os2.h create mode 100644 e_qdos.h create mode 100644 e_riscos.h create mode 100644 e_vms.h create mode 100644 eiffel.c create mode 100644 entry.c create mode 100644 entry.h create mode 100644 erlang.c create mode 100644 flex.c create mode 100644 fortran.c create mode 100644 general.h create mode 100644 get.c create mode 100644 get.h create mode 100644 gnu_regex/.svn/all-wcprops create mode 100644 gnu_regex/.svn/entries create mode 100644 gnu_regex/.svn/text-base/README.txt.svn-base create mode 100644 gnu_regex/.svn/text-base/regcomp.c.svn-base create mode 100644 gnu_regex/.svn/text-base/regex.c.svn-base create mode 100644 gnu_regex/.svn/text-base/regex.h.svn-base create mode 100644 gnu_regex/.svn/text-base/regex_internal.c.svn-base create mode 100644 gnu_regex/.svn/text-base/regex_internal.h.svn-base create mode 100644 gnu_regex/.svn/text-base/regexec.c.svn-base create mode 100644 gnu_regex/README.txt create mode 100644 gnu_regex/regcomp.c create mode 100644 gnu_regex/regex.c create mode 100644 gnu_regex/regex.h create mode 100644 gnu_regex/regex_internal.c create mode 100644 gnu_regex/regex_internal.h create mode 100644 gnu_regex/regexec.c create mode 100644 html.c create mode 100644 jscript.c create mode 100644 keyword.c create mode 100644 keyword.h create mode 100644 lisp.c create mode 100644 lregex.c create mode 100644 lua.c create mode 100644 mac.c create mode 100644 magic.diff create mode 100644 main.c create mode 100644 main.h create mode 100644 maintainer.mak create mode 100644 make.c create mode 100644 matlab.c create mode 100644 mk_bc3.mak create mode 100644 mk_bc5.mak create mode 100644 mk_djg.mak create mode 100644 mk_manx.mak create mode 100644 mk_mingw.mak create mode 100644 mk_mpw.mak create mode 100644 mk_mvc.mak create mode 100644 mk_os2.mak create mode 100644 mk_qdos.mak create mode 100644 mk_sas.mak create mode 100755 mkinstalldirs create mode 100644 ocaml.c create mode 100644 options.c create mode 100644 options.h create mode 100644 parse.c create mode 100644 parse.h create mode 100644 parsers.h create mode 100644 pascal.c create mode 100644 perl.c create mode 100644 php.c create mode 100644 python.c create mode 100644 qdos.c create mode 100644 read.c create mode 100644 read.h create mode 100644 readtags.c create mode 100644 readtags.h create mode 100644 rexx.c create mode 100644 routines.c create mode 100644 routines.h create mode 100644 ruby.c create mode 100644 scheme.c create mode 100644 sh.c create mode 100644 slang.c create mode 100644 sml.c create mode 100644 sort.c create mode 100644 sort.h create mode 100644 source.mak create mode 100644 sql.c create mode 100644 strlist.c create mode 100644 strlist.h create mode 100644 tcl.c create mode 100644 tex.c create mode 100644 verilog.c create mode 100644 vhdl.c create mode 100644 vim.c create mode 100644 vstring.c create mode 100644 vstring.h create mode 100644 yacc.c diff --git a/.indent.pro b/.indent.pro new file mode 100644 index 0000000..284f665 --- /dev/null +++ b/.indent.pro @@ -0,0 +1,31 @@ +--blank-before-sizeof +--blank-lines-after-procedures +--brace-indent0 +--braces-after-if-line +--braces-on-struct-decl-line +--break-after-boolean-operator +--case-brace-indentation0 +--case-indentation0 +--comment-indentation0 +--continuation-indentation4 +--cuddle-do-while +--declaration-comment-column0 +--declaration-indentation0 +--dont-break-function-decl-args +--dont-break-procedure-type +--dont-line-up-parentheses +--honour-newlines +--indent-level4 +--line-length80 +--paren-indentation4 +--preprocessor-indentation1 +--no-blank-lines-after-commas +--space-after-cast +--space-after-for +--space-after-if +--space-after-procedure-calls +--space-after-while +--space-special-semicolon +--start-left-side-of-comments +--struct-brace-indentation4 +--tab-size4 diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..60549be --- /dev/null +++ b/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/EXTENDING.html b/EXTENDING.html new file mode 100644 index 0000000..7a7b865 --- /dev/null +++ b/EXTENDING.html @@ -0,0 +1,386 @@ + + + +Exuberant Ctags: Adding support for a new language + + + +

How to Add Support for a New Language to Exuberant Ctags

+ +

+Exuberant Ctags has been designed to make it very easy to add your own +custom language parser. As an exercise, let us assume that I want to add +support for my new language, Swine, the successor to Perl (i.e. Perl +before Swine <wince>). This language consists of simple definitions of +labels in the form "def my_label". Let us now examine the various +ways to do this. +

+ +

Operational background

+ +

+As ctags considers each file name, it tries to determine the language of the +file by applying the following three tests in order: if the file extension has +been mapped to a language, if the file name matches a shell pattern mapped to +a language, and finally if the file is executable and its first line specifies +an interpreter using the Unix-style "#!" specification (if supported on the +platform). If a language was identified, the file is opened and then the +appropriate language parser is called to operate on the currently open file. +The parser parses through the file and whenever it finds some interesting +token, calls a function to define a tag entry. +

+ +

Creating a user-defined language

+ +

+The quickest and easiest way to do this is by defining a new language using +the program options. In order to have Swine support available every time I +start ctags, I will place the following lines into the file +$HOME/.ctags, which is read in every time ctags starts: + + +

+  --langdef=swine
+  --langmap=swine:.swn
+  --regex-swine=/^def[ \t]*([a-zA-Z0-9_]+)/\1/d,definition/
+
+ +The first line defines the new language, the second maps a file extension to +it, and the third defines a regular expression to identify a language +definition and generate a tag file entry for it. +

+ +

Integrating a new language parser

+ +

+Now suppose that I want to truly integrate compiled-in support for Swine into +ctags. First, I create a new module, swine.c, and add one +externally visible function to it, extern parserDefinition +*SwineParser(void), and add its name to the table in +parsers.h. The job of this parser definition function is to +create an instance of the parserDefinition structure (using +parserNew()) and populate it with information defining how files +of this language are recognized, what kinds of tags it can locate, and the +function used to invoke the parser on the currently open file. +

+ +

+The structure parserDefinition allows assignment of the following +fields: + + +

+  const char *name;               /* name of language */
+  kindOption *kinds;              /* tag kinds handled by parser */
+  unsigned int kindCount;         /* size of `kinds' list */
+  const char *const *extensions;  /* list of default extensions */
+  const char *const *patterns;    /* list of default file name patterns */
+  parserInitialize initialize;    /* initialization routine, if needed */
+  simpleParser parser;            /* simple parser (common case) */
+  rescanParser parser2;           /* rescanning parser (unusual case) */
+  boolean regex;                  /* is this a regex parser? */
+
+ +

+ +

+The name field must be set to a non-empty string. Also, unless +regex is set true (see below), either parser or +parser2 must set to point to a parsing routine which will +generate the tag entries. All other fields are optional. + +

+Now all that is left is to implement the parser. In order to do its job, the +parser should read the file stream using using one of the two I/O interfaces: +either the character-oriented fileGetc(), or the line-oriented +fileReadLine(). When using fileGetc(), the parser +can put back a character using fileUngetc(). How our Swine parser +actually parses the contents of the file is entirely up to the writer of the +parser--it can be as crude or elegant as desired. You will note a variety of +examples from the most complex (c.c) to the simplest (make.c). +

+ +

+When the Swine parser identifies an interesting token for which it wants to +add a tag to the tag file, it should create a tagEntryInfo +structure and initialize it by calling initTagEntry(), which +initializes defaults and fills information about the current line number and +the file position of the beginning of the line. After filling in information +defining the current entry (and possibly overriding the file position or other +defaults), the parser passes this structure to makeTagEntry(). +

+ +

+Instead of writing a character-oriented parser, it may be possible to specify +regular expressions which define the tags. In this case, instead of defining a +parsing function, SwineParser(), sets regex to true, +and points initialize to a function which calls +addTagRegex() to install the regular expressions which define its +tags. The regular expressions thus installed are compared against each line +of the input file and generate a specified tag when matched. It is usually +much easier to write a regex-based parser, although they can be slower (one +parser example was 4 times slower). Whether the speed difference matters to +you depends upon how much code you have to parse. It is probably a good +strategy to implement a regex-based parser first, and if it is too slow for +you, then invest the time and effort to write a character-based parser. +

+ +

+A regex-based parser is inherently line-oriented (i.e. the entire tag must be +recognizable from looking at a single line) and context-insensitive (i.e the +generation of the tag is entirely based upon when the regular expression +matches a single line). However, a regex-based callback mechanism is also +available, installed via the function addCallbackRegex(). This +allows a specified function to be invoked whenever a specific regular +expression is matched. This allows a character-oriented parser to operate +based upon context of what happened on a previous line (e.g. the start or end +of a multi-line comment). Note that regex callbacks are called just before the +first character of that line can is read via either fileGetc() or +using fileGetc(). The effect of this is that before either of +these routines return, a callback routine may be invoked because the line +matched a regex callback. A callback function to be installed is defined by +these types: + + +

+  typedef void (*regexCallback) (const char *line, const regexMatch *matches, unsigned int count);
+
+  typedef struct {
+      size_t start;   /* character index in line where match starts */
+      size_t length;  /* length of match */
+  } regexMatch;
+
+ +

+ +

+The callback function is passed the line matching the regular expression and +an array of count structures defining the subexpression matches +of the regular expression, starting from \0 (the entire line). +

+ +

+Lastly, be sure to add your the name of the file containing your parser (e.g. +swine.c) to the macro SOURCES in the file source.mak +and an entry for the object file to the macro OBJECTS in the same +file, so that your new module will be compiled into the program. +

+ +

+This is all there is to it. All other details are specific to the parser and +how it wants to do its job. There are some support functions which can take +care of some commonly needed parsing tasks, such as keyword table lookups (see +keyword.c), which you can make use of if desired (examples of its use can be +found in c.c, eiffel.c, and fortran.c). Almost everything is already taken care +of automatically for you by the infrastructure. Writing the actual parsing +algorithm is the hardest part, but is not constrained by any need to conform +to anything in ctags other than that mentioned above. +

+ +

+There are several different approaches used in the parsers inside Exuberant +Ctags and you can browse through these as examples of how to go about +creating your own. +

+ +

Examples

+ +

+Below you will find several example parsers demonstrating most of the +facilities available. These include three alternative implementations +of a Swine parser, which generate tags for lines beginning with +"def" followed by some name. +

+ + +
+/***************************************************************************
+ * swine.c
+ * Character-based parser for Swine definitions
+ **************************************************************************/
+/* INCLUDE FILES */
+#include "general.h"    /* always include first */
+
+#include <string.h>     /* to declare strxxx() functions */
+#include <ctype.h>      /* to define isxxx() macros */
+
+#include "parse.h"      /* always include */
+#include "read.h"       /* to define file fileReadLine() */
+
+/* DATA DEFINITIONS */
+typedef enum eSwineKinds {
+    K_DEFINE
+} swineKind;
+
+static kindOption SwineKinds [] = {
+    { TRUE, 'd', "definition", "pig definition" }
+};
+
+/* FUNCTION DEFINITIONS */
+
+static void findSwineTags (void)
+{
+    vString *name = vStringNew ();
+    const unsigned char *line;
+
+    while ((line = fileReadLine ()) != NULL)
+    {
+        /* Look for a line beginning with "def" followed by name */
+        if (strncmp ((const char*) line, "def", (size_t) 3) == 0  &&
+            isspace ((int) line [3]))
+        {
+            const unsigned char *cp = line + 4;
+            while (isspace ((int) *cp))
+                ++cp;
+            while (isalnum ((int) *cp)  ||  *cp == '_')
+            {
+                vStringPut (name, (int) *cp);
+                ++cp;
+            }
+            vStringTerminate (name);
+            makeSimpleTag (name, SwineKinds, K_DEFINE);
+            vStringClear (name);
+        }
+    }
+    vStringDelete (name);
+}
+
+/* Create parser definition stucture */
+extern parserDefinition* SwineParser (void)
+{
+    static const char *const extensions [] = { "swn", NULL };
+    parserDefinition* def = parserNew ("Swine");
+    def->kinds      = SwineKinds;
+    def->kindCount  = KIND_COUNT (SwineKinds);
+    def->extensions = extensions;
+    def->parser     = findSwineTags;
+    return def;
+}
+
+
+ +

+

+
+/***************************************************************************
+ * swine.c
+ * Regex-based parser for Swine
+ **************************************************************************/
+/* INCLUDE FILES */
+#include "general.h"    /* always include first */
+#include "parse.h"      /* always include */
+
+/* FUNCTION DEFINITIONS */
+
+static void installSwineRegex (const langType language)
+{
+    addTagRegex (language, "^def[ \t]*([a-zA-Z0-9_]+)", "\\1", "d,definition", NULL);
+}
+
+/* Create parser definition stucture */
+extern parserDefinition* SwineParser (void)
+{
+    static const char *const extensions [] = { "swn", NULL };
+    parserDefinition* def = parserNew ("Swine");
+    parserDefinition* const def = parserNew ("Makefile");
+    def->patterns   = patterns;
+    def->extensions = extensions;
+    def->initialize = installMakefileRegex;
+    def->regex      = TRUE;
+    return def;
+}
+
+
+ +

+

+/***************************************************************************
+ * swine.c
+ * Regex callback-based parser for Swine definitions
+ **************************************************************************/
+/* INCLUDE FILES */
+#include "general.h"    /* always include first */
+
+#include "parse.h"      /* always include */
+#include "read.h"       /* to define file fileReadLine() */
+
+/* DATA DEFINITIONS */
+typedef enum eSwineKinds {
+    K_DEFINE
+} swineKind;
+
+static kindOption SwineKinds [] = {
+    { TRUE, 'd', "definition", "pig definition" }
+};
+
+/* FUNCTION DEFINITIONS */
+
+static void definition (const char *const line, const regexMatch *const matches,
+                       const unsigned int count)
+{
+    if (count > 1)    /* should always be true per regex */
+    {
+        vString *const name = vStringNew ();
+        vStringNCopyS (name, line + matches [1].start, matches [1].length);
+        makeSimpleTag (name, SwineKinds, K_DEFINE);
+    }
+}
+
+static void findSwineTags (void)
+{
+    while (fileReadLine () != NULL)
+        ;  /* don't need to do anything here since callback is sufficient */
+}
+
+static void installSwine (const langType language)
+{
+    addCallbackRegex (language, "^def[ \t]+([a-zA-Z0-9_]+)", NULL, definition);
+}
+
+/* Create parser definition stucture */
+extern parserDefinition* SwineParser (void)
+{
+    static const char *const extensions [] = { "swn", NULL };
+    parserDefinition* def = parserNew ("Swine");
+    def->kinds      = SwineKinds;
+    def->kindCount  = KIND_COUNT (SwineKinds);
+    def->extensions = extensions;
+    def->parser     = findSwineTags;
+    def->initialize = installSwine;
+    return def;
+}
+
+ +

+

+/***************************************************************************
+ * make.c
+ * Regex-based parser for makefile macros
+ **************************************************************************/
+/* INCLUDE FILES */
+#include "general.h"    /* always include first */
+#include "parse.h"      /* always include */
+
+/* FUNCTION DEFINITIONS */
+
+static void installMakefileRegex (const langType language)
+{
+    addTagRegex (language, "(^|[ \t])([A-Z0-9_]+)[ \t]*:?=", "\\2", "m,macro", "i");
+}
+
+/* Create parser definition stucture */
+extern parserDefinition* MakefileParser (void)
+{
+    static const char *const patterns [] = { "[Mm]akefile", NULL };
+    static const char *const extensions [] = { "mak", NULL };
+    parserDefinition* const def = parserNew ("Makefile");
+    def->patterns   = patterns;
+    def->extensions = extensions;
+    def->initialize = installMakefileRegex;
+    def->regex      = TRUE;
+    return def;
+}
+
+ + + diff --git a/FAQ b/FAQ new file mode 100644 index 0000000..b62d4ec --- /dev/null +++ b/FAQ @@ -0,0 +1,371 @@ +Frequently Asked Questions +========================== + + * 1. Why do you call it "Exuberant Ctags"? + * 2. Why doesn't my editor work with these tag files? + * 3. What are these strange bits of text beginning with ;"? + * 4. Why doesn't XEmacs' Speedbar module work with Exuberant Ctags? + * 5. Why doesn't Xemacs correctly locate the tag in the source file? + * 6. Why doesn't NEdit correctly locate the tag in the source file? + * 7. Why can't I jump to "class::member"? + * 8. How can I avoid having to specify my favorite option every time? + * 9. Why do I end up on the wrong line when I jump to a tag? + * 10. How do I jump to the tag I want instead of the wrong one by the + same name? + * 11. What is "Vim"? + * 12. How can I locate all references to a specific function or variable? + * 13. Why does appending tags to a tag file tag so long? + * 14. How do I get regex support for Win32? + * 15. How should I set up tag files for a multi-level directory hierarchy? + + ---------------------------------------------------------------------- +1. Why do you call it "Exuberant Ctags"? + +Because one of the meanings of the word "exuberant" is: + + exuberant : produced in extreme abundance : PLENTIFUL syn see PROFUSE + +Compare the tag file produced by Exuberant Ctags with that produced by any +other ctags and you will see how appropriate the name is. + + ---------------------------------------------------------------------- +2. Why doesn't my editor work with these tag files? + +3. What are these strange bits of text beginning with ;" which follow + many of the lines in the tag file? + +These are "extension flags". They are added in order to provide extra +information about the tag that may be utilized by the editor in order to +more intelligently handle tags. They are appended to the EX command part of +the tag line in a manner that provides backwards compatibility with existing +implementations of the Vi editor. The semicolon is an EX command separator +and the double quote begins an EX comment. Thus, the extension flags appear +as an EX comment and should be ignored by the editor when it processes the +EX command. + +Some non-vi editors, however, implement only the bare minimum of EX commands +in order to process the search command or line number in the third field of +the tag file. If you encounter this problem, use the option "--format=1" to +generate a tag file without these extensions (remember that you can set the +CTAGS environment variable to any default arguments you wish to supply). Then +ask the supplier of your editor to implement handling of this feature of EX +commands. + + ---------------------------------------------------------------------- +4. Why doesn't XEmacs' Speedbar module work with Exuberant Ctags? + +The default command line switches used by XEmacs for "etags" are not +compatible with Exuberant Ctags options. By default, Exuberant Ctags installs +a symbolic link, "etags", pointing to the ctags executable. When Exuberant +Ctags is started with the name "etags", it produces Emacs-style tag files by +default. + +To fix this, add the following lines to your .emacs file, replacing the path +to "etags" with the path where the symbolic link was installed. + +(autoload 'speedbar "speedbar") +(setq speedbar-fetch-etags-command "/usr/local/bin/etags" + speedbar-fetch-etags-arguments '("-f" "-")) + + ---------------------------------------------------------------------- +5. Why doesn't Xemacs correctly locate the tag in the source file? + +This has been observed with version 20.3. It seems that when Xemacs searches +for a tag, it searches using the tag name instead of the search string located +in the TAGS file. This is a bug in Xemacs and does not occur in the GNU +version of Emacs. + + ---------------------------------------------------------------------- +6. Why doesn't NEdit correctly locate the tag in the source file? + +Versions of NEdit prior to 5.1 did not support the extended tag file format +generated by Exuberant Ctags by default. Either upgrade to version 5.1 or +specify the option "--format=1" when running ctags to output the old tag file +format. + + ---------------------------------------------------------------------- +7. Why can't I jump to "class::member"? + +Because, by default, ctags only generates tags for the separate identifiers +found in the source files. If you specify the --extra=+q option, then +ctags will also generate a second, class-qualified tag for each class member +(data and function/method) in the form class::member for C++, and in the form +class.method for Eiffel and Java. + + ---------------------------------------------------------------------- +8. How can I avoid having to specify my favorite option every time? + +Either by setting the environment variable CTAGS to your custom +options, or putting them into a .ctags file in your home directory. + + ---------------------------------------------------------------------- +9. Why do I end up on the wrong line when I jump to a tag? + +By default, ctags encodes the line number in the file where macro (#define) +tags are found. This was done to remain compatible with the original UNIX +version of ctags. If you change the file containing the tag without +rebuilding the tag file, the location of tag in the tag file may no longer +match the current location. + +In order to avoid this problem, you can specify the option "--excmd=p", +which causes ctags to use a search pattern to locate macro tags. I have +never uncovered the reason why the original UNIX ctags used line numbers +exclusively for macro tags, but have so far resisted changing the default +behaviour of Exuberant Ctags to behave differently. + + ---------------------------------------------------------------------- +10. How do I jump to the tag I want instead of the wrong one by the + same name? + +A tag file is simple a list of tag names and where to find them. If there +are duplicate entries, you often end up going to the wrong one because the +tag file is sorted and your editor locates the first one in the tag file. + +Standard Vi provides no facilities to alter this behavior. However, Vim +has some nice features to minimize this problem, primarly by examining all +matches and choosing the best one under the circumstances. Vim also provides +commands which allow for selection of the desired matching tag. + + ---------------------------------------------------------------------- +11. What is "Vim"? + +Vim is a vi-compatible editor available as source and compilable for any +platform. Yeah, I know the first reaction is to shy away from this. But you +will never regret getting it, and you will become greatly attached to its +features, which you can learn gradually. I would be willing to say that it +is the best vi-clone available within 4 light-years of Alpha Centauri. It +works (nearly) exactly like standard vi, but provides some incredibly useful +extensions (some of which I have participated in designing with the author). +Most Linux distributions have adopted Vim as its standard vi. + + ---------------------------------------------------------------------- +12. How can I locate all references to a specific function or variable? + +There are several packages already available which provide this capability. +Namely, these are: GLOBAL source code tag system, GNU id-utils, cscope, +and cflow. As of this writing, they can be found in the following locations: + +GLOBAL: http://www.gnu.org/software/global +id-utils: http://www.gnu.org/software/idutils/idutils.html +cscope: http://cscope.sourceforge.net +cflow: ftp://www.ibiblio.org/pub/Linux/devel/lang/c + + ---------------------------------------------------------------------- +13. Why does appending tags to a tag file tag so long? + +Sometimes, in an attempt to build a global tag file for all source files in +a large source tree of many directories, someone will make an attempt to run +ctags in append (-a) mode on every directory in the hierarchy. Each time +ctags is invoked, its default behavior is to sort the tag file once the tags +for that execution have been added. As the cumulative tag file grows, the sort +time increases arithmetically. + +The best way to avoid this problem (and the most efficient) is to make +use of the --recurse (or -R) option of ctags by executing the following +command in the root of the directory hierarchy (thus running ctags only once): + + ctags -R + +If you really insist on running ctags separately on each directory, you can +avoid the sort pass each time by specifying the option "--sort=no". Once the +tag file is completely built, use the sort command to manually sort the +final tag file, or let the final invocation of ctags sort the file. + + ---------------------------------------------------------------------- +14. How do I get regex support for Win32? + +You need to download the GNU regex package for Win32 from the following +location: + + http://people.delphiforums.com/gjc/gnu_regex.html + +Then point the makefile macro, REGEX_DIR, found in mk_mvc.mak and mk_bc5.mak, +to the directory created by extracting this archive. + + ---------------------------------------------------------------------- +15. How should I set up tag files for a multi-level directory hierarchy? + +There are a few ways of approaching this: + +1. A local tag file in each directory containing only the tags for source + files in that directory. + +2. One single big, global tag file present in the root directory of your + hierarchy, containing all tags present in all source files in the + hierarchy. + +3. A local tag file in each directory containing only the tags for source + files in that directory, in addition to one single global tag file + present in the root directory of your hierarchy, containing all + non-static tags present in all source files in the hierarchy. + +4. A local tag file in each directory of the hierarchy, each one + containing all tags present in source files in that directory and all + non-static tags in every directory below it (note that this implies + also having one big tag file in the root directory of the hierarchy). + +Each of these approaches has its own set of advantages and disadvantages, +depending upon your particular conditions. Which approach is deemed best +depends upon the following factors: + +A. The ability of your editor to use multiple tag files. + + If your editor cannot make use of multiple tag files (original vi + implementations could not), then one large tag file is the only way to + go if you ever desire to jump to tags located in other directories. If + you never need to jump to tags in another directory (i.e. the source + in each directory is entirely self-contained), then a local tag file + in each directory will fit your needs. + +B. The time is takes for your editor to look up a tag in the tag file. + + The significance of this factor depends upon the size of your source + tree and on whether the source files are located on a local or remote + file system. For source and tag files located on a local file system, + looking up a tag is not as big a hit as one might first imagine, since + vi implementations typically perform a binary search on a sorted tag + file. This may or may not be true for the editor you use. For files + located on a remote file system, reading a large file is an expensive + operation. + +C. Whether or not you expect the source code to change and the time it + takes to rebuild a tag file to account for changes to the source code. + + While Exuberant Ctags is particularly fast in scanning source code + (around 1-2 MB/sec), a large project may still result in objectionable + delays if one wishes to keep their tag file(s) up to date on a + frequent basis, or if the files are located on a remote file system. + +D. The presence of duplicate tags in the source code and the ability to + handle them. + + The impact of this factor is influenced by the following three issues: + + 1. How common are duplicate tags in your project? + + 2. Does your editor provide any facilities for dealing with duplicate + tags? + + While standard vi does not, many modern vi implementations, such + as Vim have good facilities for selecting the desired match from + the list of duplicates. If your editor does not support duplicate + tags, then it will typically send you to only one of them, whether + or not that is the one you wanted (and not even notifying you that + there are other potential matches). + + 3. What is the significance of duplicate tags? + + For example, if you have two tags of the same name from entirely + isolated software components, jumping first to the match found + in component B while working in component A may be entirely + misleading, distracting or inconvenient (to keep having to choose + which one if your editor provides you with a list of matches). + However, if you have two tags of the same name for parallel builds + (say two initialization routines for different hosts), you may + always want to specify which one you want. + +Of the approaches listed above, I tend to favor Approach 3. My editor of +choice is Vim, which provides a rich set of features for handling multiple +tag files, which partly influences my choice. If you are working with +source files on a remote file system, then I would recommend either +Approach 3 or Approach 4, depending upon the hit when reading the global +tag file. + +The advantages of Approach 3 are many (assuming that your editor has +the ability to support both multiple tag files and duplicate tags). All +lookups of tag located in the currect directory are fast and the local +tag file can be quickly and easily regenerated in one second or less +(I have even mapped a keystroke to do this easily). A lookup of a +(necessarily non-static) tag found in another directory fails a lookup in +the local tag file, but is found in the global tag file, which satisfies +all cross-directory lookups. The global tag file can be automatically +regenerated periodically with a cron job (and perhaps the local tag files +also). + +Now I give an example of how you would implement Approach 3. Means of +implementing the other approaches can be performed in a similar manner. + +Here is a visual representation of an example directory hierarchy: + +project + `-----misccomp + | `... + `-----sysint + `-----client + | `-----hdrs + | `-----lib + | `-----src + | `-----test + `-----common + | `-----hdrs + | `-----lib + | `-----src + | `-----test + `-----server + `-----hdrs + `-----lib + `-----src + `-----test + +Here is a recommended solution (conceptually) to build the tag files: + +1. Within each of the leaf nodes (i.e. hdrs, lib, src, test) build a tag + file using "ctags *.[ch]". This can be easily be done for the whole + hierarchy by making a shell script, call it "dirtags", containing the + following lines: + + #!/bin/sh + cd $1 + ctags * + + Now execute the following command: + + find * -type d -exec dirtags {} \; + + These tag files are trivial (and extremely quick) to rebuild while + making changes within a directory. The following Vim key mapping is + quite useful to rebuild the tag file in the directory of the current + source file: + + :nmap ,t :!(cd %:p:h;ctags *.[ch])& + +2. Build the global tag file: + + cd ~/project + ctags --file-scope=no -R + + thus constructing a tag file containing only non-static tags for all + source files in all descendent directories. + +3. Configure your editor to read the local tag file first, then consult + the global tag file when not found in the local tag file. In Vim, + this is done as follows: + + :set tags=./tags,tags,~/project/tags + +If you wish to implement Approach 4, you would need to replace the +"dirtags" script of step 1 with the following: + + #!/bin/sh + cd $1 + ctags * + # Now append the non-static tags from descendent directories + find * -type d -prune -print | ctags -aR --file-scope=no -L- + +And replace the configuration of step 3 with this: + + :set tags=./tags,./../tags,./../../tags,./../../../tags,tags + +As a caveat, it should be noted that step 2 builds a global tag file whose +file names will be relative to the directory in which the global tag file +is being built. This takes advantage of the Vim 'tagrelative' option, +which causes the path to be interpreted a relative to the location of the +tag file instead of the current directory. For standard vi, which always +interprets the paths as relative to the current directory, we need to +build the global tag file with absolute path names. This can be +accomplished by replacing step 2 with the following: + + cd ~/project + ctags --file-scope=no -R `pwd` + +-- diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..9f059c2 --- /dev/null +++ b/INSTALL @@ -0,0 +1,215 @@ +Custom Installation +=================== + +These installation instructions are for Unix or Unix-like platforms (or at +least, those platforms which are able to run a Bourne shell script). If you +are attempting to install Exuberant Ctags on some other platform, see the file +INSTALL.oth. + +If you are not familiar with using the configure scripts generated by GNU +autoconf, read the "Basic Installation" section below; then return here. +The configure script in this package supports the following custom options: + + --disable-etags By default, "make install" will install one + binary, "ctags", one man page, "ctags.1", and + create links to these two files by the names + "etags" and "etags.1". If you do not want to + install the "etags" links, use this option. + + --disable-extended-format Ctags now appends "extension flags" to the + end of each tag entry in a manner which is + backwards with original Vi implementation + (they are placed into an EX comment). This + can be disabled via use of the ctags --format + option. This configure option changes the + default behavior of ctags to disable use of + these extension flags (i.e. use the original + tag file format). + + --disable-external-sort Use this option to force use of an internal + sort algorithm. On UNIX-like systems, ctags + uses the sort utility of the operating system + by default because it is more memory efficient. + + --enable-custom-config=FILE Defines a custom option configuration file to + establish site-wide defaults. Ctags will read + the following files at startup for options: + /etc/ctags.conf, /usr/local/etc/ctags.conf, + $HOME/.ctags, and .ctags. If you need a + different file, set this option to the full + path name of the file you want to be read, and + it will be read immediately before reading + $HOME/.ctags. + + --enable-macro-patterns By default, line numbers are used in the tag + file for #define objects, in order to remain + compatible with the original UNIX ctags. This + option will make the default use patterns. + + --enable-maintainer-mode Creates a special GNU-specific version of the + makefile which is used to maintain Exuberant + Ctags. + + --enable-tmpdir=DIR When the library function mkstemp() is + available, this option allows specifying the + default directory to use for temporary files + generated by ctags. This default can be + changed at run time by setting the environment + variable TMPDIR. + +If you wish to change the name of the installed files, edit the makefile +produced by the configure script ("Makefile") before performing the "make +install" step. There are two lines at the top of the file where the names of +the installed files may be customized. + + +Basic Installation +================== + + These are generic installation instructions. + + The `configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a `Makefile' in each directory of the package. +It may also create one or more `.h' files containing system-dependent +definitions. Finally, it creates a shell script `config.status' that +you can run in the future to recreate the current configuration, a file +`config.cache' that saves the results of its tests to speed up +reconfiguring, and a file `config.log' containing compiler output +(useful mainly for debugging `configure'). + + If you need to do unusual things to compile the package, please try +to figure out how `configure' could check whether to do them, and mail +diffs or instructions to the address given in the `README' so they can +be considered for the next release. If at some point `config.cache' +contains results you don't want to keep, you may remove or edit it. + + The file `configure.in' is used to create `configure' by a program +called `autoconf'. You only need `configure.in' if you want to change +it or regenerate `configure' using a newer version of `autoconf'. + +The simplest way to compile this package is: + + 1. `cd' to the directory containing the package's source code and type + `./configure' to configure the package for your system. If you're + using `csh' on an old version of System V, you might need to type + `sh ./configure' instead to prevent `csh' from trying to execute + `configure' itself. + + Running `configure' takes awhile. While running, it prints some + messages telling which features it is checking for. + + 2. Type `make' to compile the package. + + 3. Optionally, type `make check' to run any self-tests that come with + the package. + + 4. Type `make install' to install the programs and any data files and + documentation. + + 5. You can remove the program binaries and object files from the + source code directory by typing `make clean'. To also remove the + files that `configure' created (so you can compile the package for + a different kind of computer), type `make distclean'. + +Compilers and Options +===================== + + Some systems require unusual options for compilation or linking that +the `configure' script does not know about. You can give `configure' +initial values for variables by setting them in the environment. Using +a Bourne-compatible shell, you can do that on the command line like +this: + CC=c89 CFLAGS=-O2 LIBS=-lposix ./configure + +Or on systems that have the `env' program, you can do it like this: + env CPPFLAGS=-I/usr/local/include LDFLAGS=-s ./configure + +Compiling For Multiple Architectures +==================================== + + You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you must use a version of `make' that +supports the `VPATH' variable, such as GNU `make'. `cd' to the +directory where you want the object files and executables to go and run +the `configure' script. `configure' automatically checks for the +source code in the directory that `configure' is in and in `..'. + + If you have to use a `make' that does not supports the `VPATH' +variable, you have to compile the package for one architecture at a time +in the source code directory. After you have installed the package for +one architecture, use `make distclean' before reconfiguring for another +architecture. + +Installation Names +================== + + By default, `make install' will install the package's files in +`/usr/local/bin', `/usr/local/man', etc. You can specify an +installation prefix other than `/usr/local' by giving `configure' the +option `--prefix=PATH'. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +give `configure' the option `--exec-prefix=PATH', the package will use +PATH as the prefix for installing programs and libraries. +Documentation and other data files will still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like `--bindir=PATH' to specify different values for particular +kinds of files. Run `configure --help' for a list of the directories +you can set and what kinds of files go in them. + +Optional Features +================= + + Some packages pay attention to `--enable-FEATURE' options to +`configure', where FEATURE indicates an optional part of the package. +They may also pay attention to `--with-PACKAGE' options, where PACKAGE +is something like `gnu-as' or `x' (for the X Window System). The +`README' should mention any `--enable-' and `--with-' options that the +package recognizes. + +Sharing Defaults +================ + + If you want to set default values for `configure' scripts to share, +you can create a site shell script called `config.site' that gives +default values for variables like `CC', `cache_file', and `prefix'. +`configure' looks for `PREFIX/share/config.site' if it exists, then +`PREFIX/etc/config.site' if it exists. Or, you can set the +`CONFIG_SITE' environment variable to the location of the site script. +A warning: not all `configure' scripts look for a site script. + +Operation Controls +================== + + `configure' recognizes the following options to control how it +operates. + +`--cache-file=FILE' + Use and save the results of the tests in FILE instead of + `./config.cache'. Set FILE to `/dev/null' to disable caching, for + debugging `configure'. + +`--help' + Print a summary of the options to `configure', and exit. + +`--quiet' +`--silent' +`-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to `/dev/null' (any error + messages will still be shown). + +`--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + `configure' can determine that directory automatically. + +`--version' + Print the version of Autoconf used to generate the `configure' + script, and exit. + +`configure' also accepts some other, not widely useful, options. + diff --git a/INSTALL.oth b/INSTALL.oth new file mode 100644 index 0000000..8577c7e --- /dev/null +++ b/INSTALL.oth @@ -0,0 +1,73 @@ +If you are attempting to install Exuberant Ctags on a Unix-like platform +(one that can at least run a Bourne shell script) see the file INSTALL. + +Installation Notes +================== + +For non-Unix platforms, simple makefiles are provided: + + descrip.mms For VMS using either DEC C or VAX C + mk_bc3.mak For MSDOS using Borland C/C++ 3.x + mk_bc5.mak For Win32 using Borland C++ 5.5 + mk_djg.mak For MSDOS using DJGPP Gnu GCC (better to follow Unix install) + mk_manx.mak For Amiga using Aztec/Manx C 5.0 + mk_mingw.mak For Win32 using MinGW + mk_mpw.mak For Macintosh using MPW + mk_mvc.mak For Win32 using Microsoft Visual C++ + mk_os2.mak For OS/2 using GCC (EMX) + mk_qdos.mak For QDOS using C68 + mk_riscos.mak For RISC OS using the GCC SDK + mk_sas.mak For Amiga using SAS/C + + +Special Notes +============= + +DJGPP: +------ + + It is better to follow the standard Unix install on DJGPP, but this requires + that you use BASH and requires a fairly complete installation of GJGPP + packages. You can use mk_djg.mak if you can't run configure. + +Macintosh with MPW: +------------------- + + To build Ctags on MPW you will have to have the Metrowerks compilers + or you will have to edit the makefiles yourself. + + Since the makefile for MPW is called mk_mpw.mak you will have to give + the command: `Make -f mk_mpw.mak CTags` and then select the output + and execute it. Alternatively you could rename this makefile to + CTags.make and issue the command `Build CTags`. + + If the build process goes wrong make sure the line endings for the + makefile are set to MacOS (CR instead of LF). + + This version of Ctags only accepts and emits unix style paths. This was + done since it was easier to implement (few changes in main source files) + and since I ported Ctags to MPW to use with Pepper which also works with + unix style paths internally. Besides, since we're now moving to MacOS X + anyway, Mac programmers are getting used to unix style paths anyway. + + Example, suppose you have a project using PowerPlant which is located in + 'HD20:tools:PowerPlant:' and you want a tags file for all powerplant + sources and your projects sources. Go to the root directory of your + project and type: + + CTags -R . '/HD20/tools/PowerPlant/' + + The '.' in this command means current directory. You will also have to + put quotes around the full unix style path since / has a special meaning + in MPW. + +RISC OS: +-------- + + Regex support on RISC OS is available when compiled to use the RISC OS port + of the GNU regex library, which can be obtained from: + + + + Using 'Set RexEx$Path .' in the regex-0/12 directory will ensure + it can be found. diff --git a/MAINTAINERS b/MAINTAINERS new file mode 100644 index 0000000..d46b50b --- /dev/null +++ b/MAINTAINERS @@ -0,0 +1,88 @@ +The following individuals are registered as developers for the maintenance of +Exuberant Ctags. They are listed by their SourgeForge username and by the +To send email to any one of them, send it to . + +Ctags SourgeForge Full +Parser username Name +---------- ----------- ----- +Ant dfishburn David Fishburn +AWK jkoshy Joseph Koshy +Basic elias Elias Pschernig +C# elliotth Elliott Hughes +DosBatch dfishburn David Fishburn +Flex dfishburn David Fishburn +Java elliotth Elliott Hughes +JavaScript dfishburn David Fishburn +MATlAB dfishburn David Fishburn +OCaml vberthoux Vincent Berthoux +Perl perlguy0 Dmitri Tikhonov +PHP jafl John Lindal +Python elias Elias Pschernig +Ruby elliotth Elliott Hughes +SML jkoshy Joseph Koshy +SQL dfishburn David Fishburn +TeX dfishburn David Fishburn +Vim dfishburn David Fishburn +All else dhiebert Darren Hiebert + +How To Build & Test Like A Maintainer +===================================== + +Prerequisites +------------- + + Debian/Ubuntu: + + sudo apt-get install build-essential subversion autoconf + + Mac OS: + + Install the Xcode developer tools, available here: + http://developer.apple.com/tools/download/ + + RedHat: + + up2date --nosig subversion autoheader autoconf + + Windows: + + Install Cygwin plus its Subversion and GNU Make packages. + +Building +-------- + + First time: + + svn co https://ctags.svn.sourceforget.net/svnroot/ctags/trunk ctags + # Miss off the "/trunk" above if you want access to old releases or the + # web site. + cd ctags + autoheader + autoconf + ./configure --enable-maintainer-mode + make -j + + Later: + + cd ctags + svn update + make -j + +Testing +------- + + First time: + + cd ctags + cp /usr/bin/ctags ctags.ref + # Edit "testing.mak" so CTAGS_TEST = ./dctags + # Edit "testing.mak" so CTAGS_REF = ./ctags.ref + make test + + Later: + + make test + + Whenever you're happy with the results and update/commit: + + cp ./dctags ./ctags.ref diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 0000000..c24764e --- /dev/null +++ b/Makefile.in @@ -0,0 +1,222 @@ +# $Id: Makefile.in 709 2009-07-04 05:29:28Z dhiebert $ +# +# Makefile for UNIX-like platforms. +# + +# These are the names of the installed programs, in case you wish to change +# them. +# +CTAGS_PROG = ctags +ETAGS_PROG = etags + +# Set this to the path to your shell (must run Bourne shell commands). +# +SHELL = /bin/sh + +# GNU Autoconf variables. These are set by the "configure" script when it +# runs. +# +exec_prefix = @exec_prefix@ +datarootdir = @datarootdir@ +prefix = @prefix@ +bindir = @bindir@ +srcdir = @srcdir@ +libdir = @libdir@ +incdir = @includedir@ +mandir = @mandir@ +SLINK = @LN_S@ +STRIP = @STRIP@ +CC = @CC@ +DEFS = @DEFS@ +CFLAGS = @CFLAGS@ +LDFLAGS = @LDFLAGS@ +LIBS = @LIBS@ +EXEEXT = @EXEEXT@ +OBJEXT = @OBJEXT@ + +# If you cannot run the "configure" script to set the variables above, then +# uncomment the defines below and customize them for your environment. If +# your system does not support symbolic (soft) links, then remove the -s +# from SLINK. +# +#srcdir = . +#bindir = /usr/local/bin +#mandir = /usr/local/man +#SLINK = ln -s +#STRIP = strip +#CC = cc +#DEFS = -DHAVE_CONFIG_H +#CFLAGS = -O +#LDFLAGS= + +include $(srcdir)/source.mak + +# +#--- You should not need to modify anything below this line. ---# +# + +.SUFFIXES: +.SUFFIXES: .c .$(OBJEXT) + +VPATH = $(srcdir) + +INSTALL = cp +INSTALL_PROG = $(INSTALL) +INSTALL_DATA = $(INSTALL) + +READ_LIB = readtags.$(OBJEXT) +READ_INC = readtags.h + +MANPAGE = ctags.1 + +AUTO_GEN = configure config.h.in +CONFIG_GEN = config.cache config.log config.status config.run config.h Makefile + +# +# names for installed man pages +# +manext = 1 +man1dir = $(mandir)/man1 +CMAN = $(CTAGS_PROG).$(manext) +EMAN = $(ETAGS_PROG).$(manext) + +# +# destinations for installed files +# +CTAGS_EXEC = $(CTAGS_PROG)$(EXEEXT) +ETAGS_EXEC = $(ETAGS_PROG)$(EXEEXT) +DEST_CTAGS = $(bindir)/$(CTAGS_EXEC) +DEST_ETAGS = $(bindir)/$(ETAGS_EXEC) +DEST_READ_LIB = $(libdir)/$(READ_LIB) +DEST_READ_INC = $(incdir)/$(READ_INC) +DEST_CMAN = $(man1dir)/$(CMAN) +DEST_EMAN = $(man1dir)/$(EMAN) + +# +# primary rules +# +all: $(CTAGS_EXEC) $(READ_LIB) + +$(CTAGS_EXEC): $(OBJECTS) + $(CC) $(LDFLAGS) -o $@ $(OBJECTS) $(LIBS) + +dctags$(EXEEXT): debug.c $(SOURCES) $(HEADERS) + $(CC) -I. -I$(srcdir) $(DEFS) -DDEBUG -g $(LDFLAGS) -o $@ debug.c $(SOURCES) + +readtags$(EXEEXT): readtags.c readtags.h + $(CC) -DREADTAGS_MAIN -I. -I$(srcdir) $(DEFS) $(CFLAGS) $(LDFLAGS) -o $@ readtags.c + +ETYPEREF_OBJS = etyperef.o keyword.o routines.o strlist.o vstring.o +etyperef$(EXEEXT): $(ETYPEREF_OBJS) + $(CC) $(LDFLAGS) -o $@ $(ETYPEREF_OBJS) + +etyperef.o: eiffel.c + $(CC) -DTYPE_REFERENCE_TOOL -I. -I$(srcdir) $(DEFS) $(CFLAGS) -o $@ -c eiffel.c + +$(OBJECTS): $(HEADERS) config.h + +# +# generic install rules +# +install: @install_targets@ + +install-strip: install + +install-ctags: install-cbin install-cman +install-etags: install-ebin install-eman + +$(bindir) $(man1dir) $(libdir) $(incdir): + $(srcdir)/mkinstalldirs $@ + +FORCE: + +# +# install the executables +# +install-bin: install-cbin install-ebin install-lib +install-cbin: $(DEST_CTAGS) +install-ebin: $(DEST_ETAGS) +install-lib: $(DEST_READ_LIB) $(DEST_READ_INC) + +$(DEST_CTAGS): $(CTAGS_EXEC) $(bindir) FORCE + $(INSTALL_PROG) $(CTAGS_EXEC) $@ && chmod 755 $@ + +$(DEST_ETAGS): + - if [ -x $(DEST_CTAGS) ]; then \ + cd $(bindir) && $(SLINK) $(CTAGS_EXEC) $(ETAGS_EXEC); \ + fi + +# +# install the man pages +# +install-man: install-cman install-eman +install-cman: $(DEST_CMAN) +install-eman: $(DEST_EMAN) + +$(DEST_CMAN): $(man1dir) $(MANPAGE) FORCE + - $(INSTALL_DATA) $(srcdir)/$(MANPAGE) $@ && chmod 644 $@ + +$(DEST_EMAN): + - if [ -f $(DEST_CMAN) ]; then \ + cd $(man1dir) && $(SLINK) $(CMAN) $(EMAN); \ + fi + +# +# install the library +# +$(DEST_READ_LIB): $(READ_LIB) $(libdir) FORCE + $(INSTALL_PROG) $(READ_LIB) $@ && chmod 644 $@ + +$(DEST_READ_INC): $(READ_INC) $(incdir) FORCE + $(INSTALL_PROG) $(READ_INC) $@ && chmod 644 $@ + + +# +# rules for uninstalling +# +uninstall: uninstall-bin uninstall-lib uninstall-man + +uninstall-bin: + - rm -f $(DEST_CTAGS) $(DEST_ETAGS) + +uninstall-lib: + - rm -f $(DEST_READ_LIB) $(DEST_READ_INC) + +uninstall-man: + - rm -f $(DEST_CMAN) $(DEST_EMAN) + +uninstall-ctags: + - rm -f $(DEST_CTAGS) $(DEST_CMAN) + +uninstall-etags: + - rm -f $(DEST_ETAGS) $(DEST_EMAN) + +# +# miscellaneous rules +# +tags: $(CTAGS_EXEC) + ./$(CTAGS_EXEC) $(srcdir)/* + +TAGS: $(CTAGS_EXEC) + ./$(CTAGS_EXEC) -e $(srcdir)/* + +clean: + rm -f $(OBJECTS) $(CTAGS_EXEC) tags TAGS $(READ_LIB) + rm -f dctags$(EXEEXT) readtags$(EXEEXT) + rm -f etyperef$(EXEEXT) etyperef.$(OBJEXT) + +mostlyclean: clean + +distclean: clean + rm -f $(CONFIG_GEN) + +maintainerclean: distclean + rm -f $(AUTO_GEN) + +# +# implicit rules +# +.c.$(OBJEXT): + $(CC) -I. -I$(srcdir) $(DEFS) $(CFLAGS) -c $< + +# vi:set tabstop=8: diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..aeb8a07 --- /dev/null +++ b/NEWS @@ -0,0 +1,871 @@ +Current Version: 5.8 + +ctags-5.8 (09 Jul 2009) +* Removed ".ml" as a Lisp extension (now OCaml) [Lisp]. +* Added support for Ant language, contributed by David Fishburn. +* Added support for DOS Batch language, contributed by David Fishburn. +* Added support for Flex (Adobe) language, contributed by David Fishburn. +* Added support for MATLAB language, contributed by David Fishburn. +* Added support for Objective Camel (OCaml), provided by Vincent Berthoux [Patch #2738723]. +* Added support for TeX language, contributed by David Fishburn. +* Added support for VHDL language, contributed by Nicolas Vincent [Bug #1943306]. +* Added support for Pyrex/Cython declarations [Python]. +* Added support for "v" kind, for variables [Python]. +* Added support for class and member variables [PHP, Bug #1037086]. +* Added support for recent enhancements to Eiffel language [Eiffel]. +* Added support for ASP classes, contributed by Zendhi Nagao; changes meaning of 'c' kind flag [ASP]. +* Added regex support when compiling with MinGW. Gnu regex module now included in all distributions. +* Fixed detection of triple strings inside other strings [Python, Bug #1988130]. +* Fixed an endless loop with comments in triple strings [Python, Bug #1988027]. +* Fixed bug where functions were sometimes seen as methods [Python, Bug #1988026]. +* Fixed parsing of method parameter annotations, fix contributed by Paolo "blaisorblade" Giarrusso [Java, Bug #2049723, #2117073]. +* Fixed parsing of global scope qualifiers in base class lists [C++, Bug #1799343]. +* Fixed bug where namespace members were given kinds corresponding to globals [C++, Bug #1924919, #1575055]. +* Fixed parsing of "else" [C#, Bug #1830344]. +* Fixed parsing of derived enums [C#, Bug #1515910]. +* Fixed parsing of "foreach" [C#, Bug #1830343]. +* Fixed parsing of simple generic classes [C#, Bug #1515910]. +* Fixed bug with detecting identifiers inside variables [Python, Bug #1809024]. +* Fixed bug with detecting identifiers at the start of variables [Python, Bug #1856363]. +* Fixed parsing of triple single-quoted multi-line strings [Python, Bug #1906062]. +* Changed to newer version of autoconf, changing configure.in to configure.ac. + +ctags-5.7 (04 Sep 2007) +* Added support for DIM AS [Freebasic, Bug #1741778]. +* Added support for arbitrary nesting depth [Python, Bug #1684786, Debian bug #409078]. +* Added support for verbatim string literals [C#, Bug #1515910]. +* Added support for .ctags as well as ctags.cnf on Windows [Bug #1246506]. +* Added support for non-extern, non-static functions returning wchar_t, contributed by Aaron Peromsik [C++, Patch #1458930]. +* Added support for numerous revision control systems including Bazaar and Mercurial [Bug #1472894]. +* Added support for enums [Java, Bug #1730485, Bug #1517143, Patch #1027395, Patch #1528507]. +* Added support for multiple-level namespace declarations [C#]. +* Added .svn to list of directories ignored during recursion (--recurse). +* Added support for BlitzBasic, PureBasic and FreeBasic [FR #1100506]. +* Added support for interfaces and static/public/protected/private functions [PHP]. +* Added support for 'package' keyword [Perl]. +* Added support for multi-line subroutine, package, and constant definitions [Perl]. +* Added support for optional subroutine declarations [Perl]. +* Added support for formats [Perl]. +* Added support for new convert keyword [Eiffel]. +* Added optional tags for forward variable declarations (e.g. 'struct C;') [C, C++]. +* Changed parsing of option input file (-L) to strip trailing white space. +* Ignore comments mixed into definitions and declarations [Perl]. +* Fixed detecting labels with whitespace after label name [Perl, Bug #1752361] +* Fixed parsing of generic classes/interfaces [Java, Bug #1447756]. +* Fixed misidentification of fully qualified function calls as labels [Perl]. +* Fixed parsing of inner classes [Python, Bug #1411963]. +* Fixed line continuation [Python, Bug #928001, Patch #819471]. +* Fixed parsing of annotations [Java, Bug #1691412]. +* Fixed block-comment parsing [Verilog, Patch #1458042, Bugs #960316, #1111214, #1606569, #1615060]. +* Fixed typo in man page [Debian bug #366412]. +* Fixed missing chunk of text in man page and over-use of hyphens in UTF-8 locales [Debian bug #271323]. +* Fixed parsing of ` as a method name [Ruby]. +* Fixed parsing of keywords in string literals [Ruby, Bug #1742588]. +* Fixed potential segmentation violation [Bug #1672834, Bug #1222926]. +* Fixed parsing of destructors with whitespace after the '~' [C++, Bug #1585745]. +* Fixed default access of unions to be public [C++, Bug #1548443]. +* Fixed various memory leaks, mostly contributed by Dmitry Antipov. +* Fixed parsing of `define [Verilog, Bug #961001]. +* Fixed crashes involving '/' [Verilog, Bug #1743330]. +* Fixed compilation problem on MinGW [Bug #1517424]. +* Fixed generation of HTML-formatted man page [Bug #1645864]. +* Fixed recognition of Python scripts having '#!/usr/bin/python' as first line [Bug #1764148]. +* Fixed parsing of Fortran comment-to-end-of-line with no newline before EOF [Debian bug #432872]. +* Fixed parsing of << [C/C++, Bugs #1020715, #1093123, #1770479, #1770607]. +* Fixed parsing of fully-qualified type names [Java, Bug #814263]. +* Fixed handling of lone carriage-return characters in file [Bug #1773926]. + +ctags-5.6 (Mon May 29 2006) +* Reformatted code for independence of tab stop setting. +* Changed default configuration to disable installation of etags links. +* Changed --langmap to first unmap each supplied extension from other languages. +* Added support for ASP constants [ASP, Patch #961842]. +* Added support for GNU make extensions [Make]. +* Added .mk as extension recognized as a make language file [Make]. +* Added missing help for list-maps options [Bug #1201826]. +* Added new extension field "typeref" [thanks to Bram Moolenaar]. +* Extended functionality of Ruby parser with patch from Elliott Hughes [Ruby]. +* Fixed creation of TAGS file with etags-include but no files [Bug #941233]. +* Fixed problem reading last line of list file (-L) without final newline. +* Fixed infinite loop that could occur on files without final newline [C, Java]. +* Fixed incorrect tag for first field of table [SQL]. +* Fixed missing tags for functions beginning with underscore [Sh]. +* Fixed missing tags for functions with variable arg list [C, Bug #1201689]. +* Fixed parsing problem with parentheses in argument list [C, Bug #1085585]. +* Fixed problem in preprocessor directive handling [C, Bug #1086609]. + +ctags-5.5.4 (Thu Mar 25 2004) +* Fixed broken -R option. + +ctags-5.5.3 (Sun Mar 14 2004) +* Removed forgotten debug statement [Bug #811704]. +* Added support for Perl labels. +* Added support for Perl "use constant" [Perl, Patch #853704, Feature Request + #710017]. +* Added support for package qualification of tags, removing useless "package" + tag kind [Perl, Feature Request #448887]. +* Added support for "and" keyword [SML, Bug #816636]. +* Added support for variables [PHP]. +* Fixed problem destroying tag file with certain info options [Bug #845502]. +* Fixed portability problem [DJGPP]. +* Fixed problem of double characters in signature field [C, Bug #852368]. +* Fixed problem manifested by errant preprocessor conditionals [Bug #839162]. +* Fixed incorrect line address in tag file for SQL tags [SQL, Bug #823000]. +* Fixed incorrect recognition of POD paragraph [Perl, Bug #842077]. +* Fixed spurious tags for for C++ member templtates [C++, Bug #849591]. +* Fixed missing tags related to template specializations [C++, Bug #872494]. +* Fixed spurious local tags for statements following labels [C]. +* Fixed missing tags for certain scoped functions [Vim]. +* Fixed infinite loop in Fortran parser. +* Fixed missing tags for certain initializers [Fortran, Bug #877956]. +* Fixed problem with comment line after continuation character [Fortran, + Bug #858165]. + +ctags-5.5.2 (Wed Sep 17 2003) +* Added tags for local variables for C-based languages [C/C++/C#/Java/Vera, + Feature Request #449503]. +* Fixed compilation problem due to type change made to accomodate change of + return type of _findfirst() in VisualStudio.Net [Win32, Bug #775789]. +* Fixed problems with certain bit fields. + +ctags-5.5.1 (Wed Jul 30 2003) +* Changed supported tag kinds for Verilog parser during overhaul. +* Restored exit of program after --help, --license, and --version options [Bug + #717311, #751240]. +* Removed inclusion of general.h (GPL) from readtags.c (public domain). +* Added support for tags for labels [PL/SQL]. +* Added support for tags for constant definitions [PHP]. +* Fixed redundant parsing of configuration file [Windows, Bug #768814]. +* Fixed missing tags for definitions spanning lines [Verilog, Bug #762027]. +* Fixed compilation error for uncommon hosts. +* Fixed missing tags for Korn shell specific function definitions. [Sh, + Bug #769184] +* Fixed missing tags when semicolon separator appears [Fortran, Bug #734933]. +* Fixed missing tags when endsubroutine keyword appears [Fortran, Bug #726712]. +* Fixed problem with fixed-form line continuation following comment [Fortran, + Bug #726875]. +* Fixed missing tags for nested blocks [PL/SQL, Bug #722501]. +* Fixed missing tags for function typedefs [C]. +* Fixed inability to map empty extension when path contained dot [Bug #742689]. + +ctags-5.5 (Tue Apr 1 2003) +* Changed kind indicator for methods from 'f' to 'm' [Tcl]. +* Changed tags within interfaces to be disabled by default (like prototypes in + C/C++) [Fortran]. +* Removed explicit descriptions of individual ---types options from + --help output. See new --list-languages and --list-kinds options. +* Removed explicit list of supported languages and supported tag kinds and + mapping patterns from man page. See new --list-languages, --list-kinds, and + --list-maps options. +* Renamed ---types option to ---kinds (still accepts old name). +* Added --list-kinds option. +* Added --list-maps option. +* Added --list-languages option. +* Added support for dimensioned variables, contributed by Simon Bohlin [ASP]. +* Added support for C# language. +* Added support for Erlang language, contributed by Brent Fulgham. +* Added support for HTML language files. +* Added support for JavaScript language files. +* Added support for SML (Standard ML) language, contributed by Venkatesh Prasad. +* Added mapping for .plx to Perl. +* Added tags for autocommand groups [Vim, Patch #664685]. +* Added support for numerous language extensions [Fortran]. +* Added '$', 'D', and 'd' in column 1 as comment characters [Fortran]. +* Added special handling of --options=NONE to disable automatic reading of + options from configuration files or environment. +* Added check for case-insensitive filenames to configure. +* Fixed problem with lower case tag [Vim, Bug #657327]. +* Fixed problem recognizing indented code [Vim, Patch #664685]. +* Fixed problem with infinite loop in certain comments [PL/SQL, Bug #629115]. +* Fixed problem of incorrect extension field [C, Bug #639639]. +* Fixed problem of empty scoping extension field [C, Bug #639644]. +* Fixed missing tags for functions split across lines [PHP, Bug #681824]. +* Fixed missing tags for nested subprograms using 'contains' [Fortran, + Bug #670433]. +* Fixed missing tags when variable has same name as keyword [Fortran]. +* Fixed spurious tag when an array-spec occurs within an entity-decl [Fortran]. +* Fixed mishandling of multiline raw strings [Python, Bug #699171]. +* Fixed missing scope extension field on namespaces [C++, C#, Bug #665086]. +* Fixed several bugs causing missed tags [Fortran]. +* Fixed problem with --langmap option preventing clearing of map [Bug #688442]. +* Fixed recognition of Unicode-8 characters [Java]. +* Fixed man page errors and omissions. +* Fixed bug in readFieldValue() in readtags library. +* Fixed bug in option parsing in readtags command-line program. +* Fixed portability problems with DJGPP [Bug #692569]. +* Fixed portability problems with Cygwin. + +ctags-5.4 (Thu Oct 17 2002) +* Improved ability for tagsOpen() in readtags library to report failure to + open tag file, adding new fields to tagFileInfo structure. +* Improved Cobol support to include data, files, groups, and sections [Cobol]. +* Added '$' a valid character for C identifier [VMS]. +* Added support for recording routine argument declarations for C-like + languages. See the --fields option and man page section TAG FILE FORMAT for + more information [C, C++, Java]. +* Added class and method support to TCL parser [TCL]. +* Added support for PL/SQL language. +* Added support for Vera language, inspired by Dave Eggum [Vera]. +* Fixed problem terminating Perl POD block [Perl, Bug #612621]. +* Fixed problem re whitespace preceding subprogram name [Pascal, Bug #612019]. +* Fixed problem with leading spaces before instruction [TCL, Bug #615928]. +* Fixed problem with double precision functions [Fortran, Bug #620288]. +* Fixed inverted test causing TMPDIR to be used for temporary files when + ctags is setuid instead of when not setuid [Bug #623713]. + +ctags-5.3.1 (Thu Sep 12 2002) +* Renamed tagsSetSorted() to tagsSetSortType() and "sorted" member of + tagFileInfo structure of readtags library to "sort". +* Added new function, tagsFirst() to readtags library. +* Fixed incorrect tag kinds [Verilog]. +* Fixed null tags for unnamed BLOCK DATA statements [Fortran]. +* Fixed missing tags for function preceded by "" [Vim]. +* Fixed missing tags for equate statements not in column 1 [Asm, Bug #538629]. +* Fixed Ruby parser (why didn't the compiler report my screw-up?) [Ruby]. + +ctags-5.3 (Wed Jul 17 2002) +* Allowed --etags-include option without input files. +* Changed Asm parser to back to C-based parser to remove redundant tags, + and extending its support for more variants [Asm]. +* Changed to using _tempnam() to create temporary files on Windows, allowing + "TMP" environment variable to set temporary directory. +* Changed the -x output to match that of traditional ctags when the --format=1 + option is supplied. The new format was also changed slightly to conform more + closely to the original format, with the addition of the extra tag type field. +* Added support for Verilog language, submitted by Nam SungHyun. +* Added support for RISC OS platform, contributed by Andrew Wingate. +* Added support for "#pragma weak", generating macro tags for weak symbols [C]. +* Added support for mixins and class methods to Ruby parser, submitted by + Matthias Veit [Ruby]. +* Added support to ctags and readtags library for case-folded sorting of tag + files, submitted by Flemming Madsen. +* Added identification of class methods [Python]. +* Fixed portability problems [Bugs #541997, #571240]. +* Fixed bug in configure script [Solaris, Bug #542966]. +* Fixed invalid package name tags [Perl, Bug #535068]. +* Fixed failure to output relative paths into etags TAGS files on Win32 + [Bug #568365]. +* Fixed incorrect line address in cases of line continuation [Fortran]. +* Fixed missing tags for certain cases of invalid syntax [C]. +* Fixed missing tags in Fortran with HPF extensions [Fortran, Bug #565813]. +* Fixed spurious tag for clients portion of feature clause when following + an empty feature clause [Eiffel]. + +ctags-5.2.3 (Sun Feb 24 2002) +* Fixed portability problem in makefile [Solaris, FreeBSD]. +* Fixed infinite loop for certain cases of invalid syntax [Eiffel]. +* Changed Asm parser to regex, extending its support for more variants [Asm]. + +ctags-5.2.2 (Sat Feb 16 2002) +* Fixed spurious tags following empty feature clause [Eiffel]. +* Fixed missing tags for classes specifying generic creation routine [Eiffel]. +* Fixed missing tags when label not followed by white space [YACC]. +* Fixed for portability [Solaris, MacOS X]. +* Added support for type reference tool [Eiffel]. + +ctags-5.2.1 (Sun Jan 20 2002) +* Portability fixes [Mingw32]. +* Added "RCS" and "CVS" to list of directories excluded by default. +* Fixed missing tags for function pointers declared const or volatile + [C, Bug #503764]. + +ctags-5.2 (Sun Dec 23 2001) +* Portability fixes [HP-UX, Solaris, VMS, OS/2]. +* Made code compilable by a C++ compiler. +* Changed reading of option files to ignore blank lines. +* Changed and enhanced interface to readtags library (see readtags.h). +* Changed from using addLanguageRegex() to addTagRegex() in regex-based + parsers. +* Added support for Lua language, submitted by Max Ischenko. +* Added instructions to man page on using tags with NEdit. +* Added setargv.obj to link for wildcard expansion [MSVC]. +* Added capability to have regex invoke a callback in a regex parser. +* Fixed regex tag problem which left newlines in back-references. +* Fixed missing class-qualified tags [Eiffel]. +* Fixed spurious tags for entries in final indexing clause [Eiffel]. +* Fixed problem with invalid filenames in preprocessor line directives. +* Fixed bug parsing scoped variables (e.g. "b:variable") [Vim, Bug #487608]. +* Fixed problem compiling readtags.c on some hosts. +* Fixed memory overwrite problem in readtags library. + +ctags-5.1 (Tue Nov 06 2001) +* Changed name of option configuration files for MSDOS, MSWindows, and OS/2. +* Changed regex support to enforce REG_NEWLINE. This fixes problem where the + newline character was explicity being matched by user patterns [Bug #431477]. +* Added new public domain library for reading tag files (see readtags.h). +* Added support for variables and namespaces, provided by Jay Glanville [Vim]. +* Added report of non-options in option configuration files and CTAGS + environment variable. +* Added support for YACC language, submitted by Nick Hibma [YACC]. +* Added support for Perl packages, submitted by Nick Hibma [Perl]. +* Added '$' as valid identifier character for DEC C compiler [VMS, Bug #425147]. +* Added compilation date and time to --version output. +* Added configure check for HP-UX to determine if ANSI options needed [HP-UX]. +* Removed tags for forward class/struct declarations [C/C++, Bug #432563]. +* Eliminated ;" separator from end of tag line when no extension fields are + present. +* Fixed segmentation violation for some Lisp files [Lisp]. +* Fixed segmentation violation occurring when file referenced in #line + directive was from an unknown language. +* Fixed loss of sync when parsing bit fields named with C++ reserved word [C]. +* Fixed compilation problem on gcc-2.7.2. +* Fixed problem parsing verbatim strings [Eiffel]. +* Fixed problem with PHP references [PHP]. +* Fixed handling of Perl __DATA__ sections [Perl]. +* Fixed problem resulting from white space in tag name due to regex name + specifier. +* Fixed double reading of $HOME/.ctags when current directory is $HOME. +* Fixed problem reading option configuration files using CR-LF newlines. +* Fixed problem preventing output control over tag kinds of regex patterns + [Bug #429869] +* Fixed incorrect parsing of Vim functions with ':' modifiers [Bug #466517]. + +ctags-5.0.1 (Sun Apr 15 2001) +* Fixed problem checking recursive links [SunOS 4.x]. +* Improved security on hosts where mkstemp() is not available. + +ctags-5.0 (Sun Mar 18 2001) +* Restructured code to simplify support for new language parsers. +* Changed source code to use ANSI-style function definitions. +* Changed scope-qualified tag entries to omit enumeration name [C/C++]. +* Changed reading of files supplied to -I option to read one token per line. +* Changed reading of option files to read one argument per line. +* Changed default extension fields, now controlled by new option --fields. +* Changed detection of etags invocation to accept any name containing "etags". +* Removed -p option, which only caused confusion and is rendered obsolete by + the change immediately above. +* Removed 'A' flag to the --c-types, --eiffel-types, and --java-types + options, replacing its functionality with the new --fields option. +* Removed 'C' flag to the --c-types, --eiffel-types, and --java-types + options, replacing its functionality with the new --extra option. +* Deprecated -i option, which was long ago replaced with the --c-types option. +* Deprecated --file-tags option, now incorporated into new --extra option. +* Deprecated --kind-long option, now incorporated into new --fields option. +* Renamed --lang[uage] option to --language-force. +* Renamed makefiles for non-Unix platforms. +* Improved parsing of assembly language files [Asm]. +* Improved parsing of Fortran language files, adding new tag kinds [Fortran]. +* Added documentation explaining how to extend ctags with new parsers. +* Added support for regular expressions, using either Posix or Gnu interface. +* Added support for mapping file names to languages using shell patterns. +* Added support for ASP scripts, submitted by Patrick Dehne [ASP]. +* Added support for Makefiles [Make]. +* Added support for Pascal language [Pascal]. +* Added support for PHP scripts, submitted by Jesus Castagnetto [PHP]. +* Added support for REXX language [REXX], based on submission by Alexaner Mai. +* Added support for Ruby, submitted by Thaddeus Covert [Ruby]. +* Added support for S-Lang, submitted by Francesc Rocher [SLang]. +* Added support for Macintosh platform using MPW (by Maarten Hekkelman). +* Added .tk as recognized extension [Tcl]. +* Added .cp and .hp as C++ extensions [C++]. +* Added .zsh as shell script extension [Sh]. +* Added support for trigraphs for C-based languages [C/C++]. +* Added language recognition for shell scripts using "#!/usr/bin/env command". +* Added check for recursive directory links. +* Added support for "[" form of verbatim strings [Eiffel]. +* Added --exclude option to exclude directories while recursing. +* Added --fields option to specify extension fields to include in output. +* Added --extra option to allow control over extra tags. +* Added --regex- option to define language-specific regular expressions. +* Added ---types options for all supported languages. +* Added --langdef option to define new languages to be parsed with regex. +* Added --languages option to restrict set of languages scanned for tags. +* Added --tag-relative option to make file paths recorded in tag file relative + to location of tag file itself instead of the current working directory when + file arguments are specified using relative paths. +* Added restriction of permissions of created temporary files when mkstemp() + is not available for security. +* Reimplemented line directive handling to work for all languages. +* Fixed tag generation for packages [Java]. +* Fixed Lisp parser [Lisp]. +* Fixed Mingw32 port [Win32]. +* Fixed bug in procedure name parsing [Tcl]. +* Fixed bug resulting in wrong column being checked for paragraphs [Cobol]. +* Fixed bug in language dispatch for executable "#!" scripts [Unix]. +* Fixed bugs resulting in incorrect scope entries in tag file [C++/Java]. +* Fixed warning caused by reinstallation of etags link [Unix]. +* Fixed destruction of existing tag file when no files supplied on invocation. +* Fixed problem in Makefile.in which prevented configuring and building in + non-source directory. Also changed Makefile.in to generate and use correct + object and executable file extensions when run on Win32. + +ctags-4.0.3 (Sun Jul 16 2000) +* Fixed compiler warnings [Amiga]. +* Fixed problem in configure.in causing struct stat st_ino member test to fail. +* Fixed problem with TAGS entries for files using DOS-style (CR-LF) new lines. +* Improved algorithm for locating Perl functions and skipping pods. +* Improved algorithm for locating shell functions [Sh]. +* Renamed Makefile.amiga to Makefile.manx [Amiga]. +* Added Makefile.sas for SAS C compiler [Amiga]. +* Updated Makefile.qdos [QDOS]. +* Improved support for DECC compiler [VAX]. + +ctags-4.0.2 (Mon Jul 10 2000) +* Now silently ignore -w option for backwards compatibility with SVR4 ctags. +* Fixed bug resulting in no extension flags when using --kind-long option. + +ctags-4.0.1 (Wed Jun 28 2000) +* Fixed segmentation violation when using --file-tags. + +ctags-4.0 (Thu Jun 22 2000) +* Fixed infinite loop on certain syntactically invalid class constructs [C++]. +* Fixed problem of incorrect tags for some pure virtual functions [C++]. +* Fixed inability to clear all tag types when using --c-types= (all languages). +* Fixed problem of arguments to parameterized class being reported as + ancestors in the "inherits" extension flag. +* Fixed missed tags for typedef-ed function pointers having a PROTO((a,b)) + style argument list. +* Fixed missing file tags for referenced files when using --line-directives + option [C/C++]. +* Fixed failure to recognize drive-qualified file name as a file name when + supplied as argument to -I option [Win32]. +* Fixed problem with missing comma in "inherits" extension flag [Java]. +* Fixed problem with incorrect or redundant parents listed for "inherits" + extension flag [Java]. +* Added check to avoid recursive symbolic links to directories. +* Added warning message for -i option, which is deprecated and being dropped. +* Added support for Assembler, COBOL, LISP, PERL, and Scheme, taken from Gnu + etags. +* Added support for AWK, Bourne/Korn/Z Shell, Python, TCL, and Vim scripts. +* Added support for the BETA language, submitted by Erik Corry. +* Added ability to determine language from interpreter specified in first line + of executable files if they are not recognized by their extension. +* Added --options option. +* Added ability to specify files having no extension with -h and --langmap + options. +* Added compile time option to separate path components with a unix-style path + separator for sharing tag file across platforms, enabled by defining the + label UNIX_PATH_SEPARATOR [Win32]. +* Fixed portability issues [VMS]. + +ctags-3.5.2 (Mon Apr 24 2000) +* Fixed problem preventing Emacs-style tags from being written to stdout. + +ctags-3.5.1 (Sun Apr 23 2000) +* Fixed infinite loop in writing Emacs-style TAGS file on platforms using + tmpnam() instead of mkstemp() [Win32]. +* Fixed minor problems in Borland makefiles [Win32]. +* Fixed compiler warning [DJGPP]. + +ctags-3.5 (Fri Apr 14 2000) +* Fixed core dump when including access field in tag file [Java]. +* Fixed failure to identify end of statement for block statements [Java]. +* Fixed bug with lone "end" in feature adaptation part of inheritance clause + [Eiffel]. +* Fixed problem preventing const functions from being recognized as pure + virtual [C/C++]. +* Fixed problem with no tags found after certain macro calls [C/C++]. +* Fixed bug in descrip.mms build file [VMS]. +* Changed to use mkstemp() (when available) to create temporary files for + security reasons and allow configuring default temporary directory, and to + override this directory at run-time by setting TMPDIR environment variable. +* Added support for extracting inheritance information into new "inherits" + extension flag [C++, Java]. +* Added Makefile.bc5 for Borland C++ version 5.5 compiler (free version). +* Added new question to FAQ regarding Xemacs. +* Updated FAQ regarding new release of NEdit. +* Renamed Borland 3.1 makefile from Makefile.bcc to Makefile.bc3. +* Renamed Microsoft Visual C++ makefile from Makefile.w32 to Makefile.mvc. + +ctags-3.4 (Thu Jan 13 2000) +* Fixed sorting problems when LC_ALL environment variable was set to foreign + locale (not fixed by previous release). +* Fixed nested scoping reported in extension flags and class-qualified tags. +* Eliminated generation of class-qualified tag entries when --c-types=+C + option is in effect but scope is empty (e.g. "::main"). +* Added support for default access of class members in Java. +* Added new extension flag "implementation", which indicates if a routine or + class is virtual or abstract. +* Minor changes for OS/2 compilation. + +ctags-3.3.3 (Thu Dec 16 1999) +* Changed how input is read for -L and --filter options to permit file names + containing spaces (see man page). +* Fixed scope recorded for C++ class elements, especially in namespaces. +* Fixed spurious tag generated for MODULE PROCEDURE in interfaces [Fortran]. +* Fixed sorting problems when LC_ALL environment variable was set to foreign + locale. +* Fixed crash on Windows when compiled with Mingw32 gcc compiler. +* Fixed compilation problems on Cray. + +ctags-3.3.2 (Fri Sep 24 1999) +* Fixed compile problem on AIX 4.1. +* Improved recovery from syntax error [Fortran]. +* Changed name of configure option (now --enable-custom-config). +* Changed Makefile.bcc to optimize for space, since code size exceeded 64KB. + +ctags-3.3.1 (Mon Sep 20 1999) +* Fixed segmentation violation occurring when directory recursion was selected. +* Fixed misleading message when out of memory during internal sort. + +ctags-3.3 (Fri Sep 17 1999) +* Fixed missing class-qualified tags [Java]. +* Fixed missing tag for functions having function pointer argument [C]. +* Fixed parsing of conversion functions [C++]. +* Added missing space following "operator" keyword to the tag names generated + for function call operators [C++]. +* Fixed string parsing to retry file as free source form upon EOF [Fortran]. +* Fixed missing tags following comments [Fortran]. +* Fixed missing labels for free source form [Fortran]. +* Removed 72 character limit for fixed form source lines, since many compilers + relax this limit and it is commonly taken advantage of. This was sometimes + causing fixed form source to be parsed as free form source [Fortran]. +* Changed misleading message when file could not be accessed. +* Changed behavior of --verbose option to display option processing. +* Changed -I option to permit clearing the token list with "-I-". +* Changed --lang option to accept new "auto" parameter. +* Changed --langmap option to accept new "default" parameter. +* Changed --eiffel-types option to accept new 'C' flag to generate + class-qualified tags. +* Changed -h option to accept new "default" parameter. +* Changed option processing. Most options may now appear anywhere on the + command line, affecting only those files which follow them. +* Added ability to specify default options in any of the files /etc/ctags.conf, + /usr/local/etc/ctags.conf, $HOME/.ctags, .ctags, and one optional file, + which can be supplied at configure time. +* Added --filter option. +* Added --filter-terminator option. + +ctags-3.2.4 (Thu Jul 01 1999) +* Changed name of macro in Makefile.in to avoid being overriden by CTAGS + environment variable. + +ctags-3.2.3 (Mon Jun 21 1999) +* Small portability change for EMX compiler on OS/2. +* Slight change to W32 and BCC makefiles. + +ctags-3.2.2 (Sat May 29 1999) +* Fixed endless error loop in the case of unreadable file. +* Fixed redundant include entries in TAGS file when using --etags-include. + +ctags-3.2.1 (Wed May 09 1999) +* Fixed problem reading -I token list from file. +* Fixed with "using" declarations which corrupted tag file [C++]. +* Fixed configure.in to more reliably recognize existing prototypes. +* Added ability to ignore preprocessor directives in Fortran files. +* Added support for egcs/MingW32 compiler [Win32]. + +ctags-3.2 (Wed Mar 03 1999) +* Fixed spurious tags related to export specifiers of feature clauses [Eiffel]. +* Fixed problem with template in ctor-initialer [C++]. +* Fixed typo causing compiler error [MSVC]. +* Extended -I option to allow token replacement [thanks to Flemming Madsen]. +* Added --etags-include option to support TAGS file includes. +* Added support for QDOS [thanks to Thierry Godefroy]. + +ctags-3.1.2 (Tue Jan 26 1999) +* Changed extension flags to eliminate space between label and value to remain + true to the intent of the agreement on the extended format made with editor + authors. +* Added --links option to permit ignoring symbolic links. +* Fixed missing tags upon ANSI style variable function argument lists. +* Fixed missing tags for methods with fully qualified type names in argument + list [Java]. +* Fixed double tags generated for enumerators followed by comma. +* Fixed missing path prefix for -p option [Win 95/NT]. + +ctags-3.1 (Wed Jan 20 1999) +* Changed -h and -langmap options to accept a plus sign as the first character + of their arguments to indicate that arguments should be added to current. +* Changed default for member tags to 'on' [C/C++]. +* Changed default for local entities to 'off' [Eiffel]. +* Added tags for forward class/struct/union/enum declarations when using + -c-types=+x [C/C++]. +* Fixed memory overwrite bug causing general protection fault [Win 95/NT]. +* Fixed missing tags for methods with throws clause [Java]. +* Fixed bad tags generated for null macro names [C]. +* Fixed spurious tag for features and entities of BIT type [Eiffel]. +* Fixed spurious tags when local entity declaration list was empty [Eiffel]. +* Fixed missing tags for contructors and destructors [C++]. +* Fixed failure to recognize function when declaration for first argument + was of template type [C++]. + +ctags-3.0.3 (Mon Dec 21 1998) +* Fixed mistake made in previous version which caused macro tags to be missed. +* Fixed parsing of --langmap option. + +ctags-3.0.2 (Mon Dec 21 1998) +* Added tags for names undefined with #undef [C/C++]. +* Added tags for renamed features (Eiffel). +* Improved Emacs-style tag file contents (per Ian Zimmerman). +* Fixed problem handling deferred, external, once, obsolete features in Eiffel. +* Fixed porting problem [OSF1 V4.0]. + +ctags-3.0.1 (Sat Dec 12 1998) +* Fixed problem with certain macros and functions with no declared return type. +* Fixed problem causing endless loop on MSDOS/Win32 by restoring use of binary + mode on opening of source files. +* Fixed porting problems [SunOS 4.1.x and MSVC++ 5.0]. + +ctags-3.0 (Sun Dec 06 1998) +* Added support for the Eiffel language (everyone should learn Eiffel). +* Added support for the Fortran language. +* Added --c-types option to specify tag types to be included for C/C++. +* Added --eiffel-types option to specify tag types to be included for Eiffel. +* Added --fortran-types option to specify tag types to be included for Fortran. +* Added --file-scope option to place verbose tag description into tag file. +* Added --file-tags option to place tags for source file names into tag file. +* Added --java-types option to specify tag types to be included for Java. +* Added --kind-long option to place verbose tag description into tag file. +* Added --linedirectives option to enable processing of #line directives so + that running ctags on preprocessor output can generate line numbers and file + names which correspond to the original source files. +* Added -V option to enable verbose message for each file considered. +* Added special handling for macros of form "INIT(= value)". +* Added ability to suffix an ignored identifier (-I option) with the '+' + character, thus instructing ctags to also ignore any argument list which + may follow the identifier. +* Changed the -i option, moving Java language options to the new --java-types + option. The -i option is now deprecated in favor of the new language + specific tag type options. +* Changed behavior of handling of ignored identifiers (-I option) to still + generate a tag for any macro definition for that identifier. +* Changed handling of -h option so that include files are no longer assumed to + be C++ files. +* Changed tags for operators to always precede the operator with the string + "operator ", thus making it consistent for all operators. [C++] +* Changed C/C++ parsing, catching many more tricky constructs. +* Changed extension flags to place a space between the label and the value for + readability. +* Fixed core dump which occurred when using -iF (now --file-tags) together + with -e (etags) on a zero-length file. +* Fixed missing or incorrect tags for conversions operators or operator "()". +* Fixed incorrect parent class in extension flags for type declarations of the + form "class Bar { OtherClass::sometype foo; }". +* Fixed missing tags for "friend" and "static" prototypes in header files. +* Fixed problem of external "sort" reporting locale not available on HPUX. +* Fixed -p option. +* Fixed VMS support. It should now work for any source file type. + +ctags-2.3.2 (Wed Sep 09 1998) +* Fixed -h option; broken since version 1.7, yet only just reported. + +ctags-2.3.1 (Sun Aug 30 1998) +* Fixed improper handling of derived structs. +* Fixed wrong class name tag when a nested-name-specifier was present in class + declaration. +* Added parent information into tag extension flags for data structures to + match that already present for members. +* Add missing documentation for --langmap option in the --help output. +* Eliminated compiler warning [gcc 2.8.1]. + +ctags-2.3 (Thu Aug 20 1998) +* Eliminated compiler warnings [SGI MIPSpro]. + +ctags-2.2.7 (Mon Aug 17 1998) +* Fixed porting problem [Borland C++]. + +ctags-2.2.6 (Wed Aug 12 1998) +* Fixed core dump encountered on some platforms when the CTAGS environment + variable was set but empty. +* Fixed porting problem [MSVC]. +* Added directory recursion support for Amiga. + +ctags-2.2.3 (Sun Aug 02 1998) +ctags-2.2.2 (Fri Jul 24 1998) +* Fixed porting problems [AIX, HP-UX, OSF/1, SunOS, MSVC]. + +ctags-2.2.1 (Fri Jul 24 1998) +* Now uses a default directory name of "." when using -R or --recurse option + (e.g. "ctags -R" is equivalent to "ctags -R ."). +* Directories named "SCCS" are skipped when using the -R or --recurse option + under Unix. +* Fixed porting problems [HP-UX, IRIX, SunOS, MSDOS/Windows]. + +ctags-2.2 (Mon Jul 20 1998) +* Added the --recurse and -R options to allow recursing into directories. + This allows running ctags on an entire source directory tree using the + single command "ctags -R ". Currently, this option is only supported on + UNIX, MSDOS, Windows 95/NT, and OS/2. Other platforms will have to wait. +* Changed writing of Emacs-style TAGS file to binary mode [MSDOS/Windows]. +* Fixed porting problems [HP-UX, OSF/1]. + +ctags-2.1.1 (Mon Jul 06 1998) +* Changed -h option to allow only periods to separate extensions. +* Added the --langmap option to allow overriding the default associations + between source language and file extension. +* Added configuration check and code work-around for putenv() prototypes + missing the const from the argument declaration [IRIX 5.2 and CRAY J90]. +* Added makefile for VMS. +* Fixed porting problem [HP-UX]. + +ctags-2.1 (Wed Jul 01 1998) +* Added Java support. +* Eliminated the --keywords option introduced in the 2.0.4, replacing it with + automatic detection of the language (i.e. recognized keywords) based upon + the file extension. +* Added the --lang option for manually overriding the automatic selection of + the language. +* Added new flag 'i' to the -i option to allow generating tags for Java + interfaces. +* Added new flag 'n' to the -i option to allow generating tags for C++ + namespaces. +* Added new flag 'x' to the -i option to allow generating tags for extern + variable declarations. +* Added new extension flags, "private", "protected", and "public", which + indicate the visibility of class members when it can be determined. +* Changed behavior of flag 'C' of the -i option to add tags of form + "class.member" for Java. +* Changed how files on command line are handled. Ctags will now only scan + those files whose extensions it knows about unless the --lang option is + specified. This allows running ctags on all files in a directory without + having to be specific (e.g. "ctags *"). +* Removed support for duplicate tag warnings and the -w and -W options. These + options are silently ignored for now. + +ctags-2.0.4 (Sat May 23 1998) +* Added sorting time to the output of the --totals option. +* Added the --keywords option to allow restricting the recognized + declaration keywords in order to handle legacy source code which uses + newer keywords for variable and parameter names. +* Ignore list now also applies to macro tags. +* /dev/stdout now properly handled as parameter to -f/-o option. +* Fixed problem handling an operator definition in C++ when white space + appeared between the "operator" keyword and the operator (e.g. "="). +* Fixed handling of non-symbolic operators (e.g. "new", "delete", etc.). +* Fixed sort order problem for some locale settings. +* Fixed segmentation violation when using ignore list (-I) on SunOS 4.x. +* Fixed a segmentation violation caused by a stack overwrite when testing a + particular kind of non-standard tag file format. + +ctags-2.0.3 (Sun Mar 12 1998) +* Added configure check for "strip" program. +* Added new flag 'C' to the -i option to allow adding extra tags to the tag + file in the form "class::member" for class methods and members. + +ctags-2.0.2 (Wed Feb 25 1998) +* Added stripping of installed binary for "install" target. + +ctags-2.0.1 (Thu Feb 19 1998) +* Added support for C++. +* Added new flag 'F' to the -i option to allow adding an extra tag for the + basename of each source file supplied to ctags. This provides the ability to + jump to a source file in Vi using ":tag file.c". +* Added new flag 'm' to generate tags for class, structure, and union members + (disabled by default). +* Added several new flags to the -i option to allow finer specification of + which types of tags to include/exclude. +* Added ".hh" extension to the default list of files recognized as header + files. +* Added explicit handling of special gcc construct __attribute((..)), + which could lead to incorrect tag generation. +* Added configure option --disable-extended-format to allow building ctags + with the extended format disabled by default. +* Added configure option --enable-macro-patterns to change the default + behavior of ctags to generate patterns instead of line numbers for macro + (define) tags. +* Changed configure option --enable-internal-sort to --disable-external-sort. +* Changed makefile for OS/2. +* Removed support for the -d, -t and -T options which had been deprecated + for some time. +* Removed ANNOUNCE file in distribution, consolidating it with the README + file. +* Replaced CHANGES file with NEWS for more GNU-like standard distribution. +* Improved the detection of macros of the type generated by Microsoft Visual C + when generating source code. These caused subsequent statements to fail to + have tags generated for them. Still not bullet proof, though. +* Fixed a problem which prevented the use of / as a path separator under MSDOS + and Win 95/NT. +* Fixed problem of blank lines occuring in the tag file. +* Fixed recognition of declarations with parentheses. +* Fixed problem of missing tags for objects within extern "C" blocks. +* Fixed problem in source file counts when using --totals option. +* Extended the length of tag type field in -x output to be more verbose. +* Fixed option initialization error which caused static tags to be excluded. + +ctags-1.7 (Mon Oct 13 1997) +* Tag files now use a new extended format which is backwards compatible with + existing Vi implementations, yet provides extended information which can be + used by supporting editors. +* Added documentation pseudo-tags (e.g. !_TAG_FILE_FORMAT) to tag file. +* Added the --excmd option as alternative to the -n and -N options. +* Added the --format option to allow forcing the old-style tag file format. +* Added the --if0 to control how "#if 0" branches are handled. +* Added the --sort option as alternative to -u option. +* Added the --totals option to print statistics of tag generation. +* Added the --version option. +* Improved handling of preprocessor conditionals. +* Code within an #if 0 is now never scanned for non-macro tags by default + since tags within that code could conceivably be overridden by more + desirable tags in the #else branch to follow. Macro tags in these branches + are always included. +* Fixed problem which could leave invalid tag entries in tag file when an + internal re-scan of a source file occurred. +* Fixed problem with internal sort mechanism when appending tags to existing + file. +* Changed external sort command to filter out duplicate identical tags lines + (including the pattern) unless warnings for duplicate tags are enabled. +* Added data to emacs style tag entries to more closely match that produced by + the GNU etags. +* Removed fixed limits on maximum tag line length. +* Correction to Amiga makefile. +* Improvements to Win32 portability with changes to Makefile.w32. + +ctags-1.6 (Tue May 13 1997) +* Now using GNU autoconf to permit automatic host configuration. +* Added the -e option to generate Emacs style tag files +* Added ETAGS environment variable to be used when -e option is in effect. +* Added the -p option to supply a default path for source files. +* Fixed problem of incorrect line numbers in tag file when form feeds and + vertical tabs occured in the source file. +* Fixed problem preventing ignoring of identifiers specified via the -I option + when the identifiers were followed by a parameter list in the source code. +* Changed the search patterns generated for #define macros when using -N + option. It now includes the character following the name (or "$" if end of + line) instead of the "\>" used previously. Some editors did not support this + metacharacter for tag searches. +* Removed -u (unique) flag from sort command line invocation to retain exactly + indentical tag patterns in the same file. This is in preparation for editor + support of duplicate tags. +* Fixed problem resulting in no tags generated following a function-like + macro reference outside of a function block. +* Fixed problem of no tags generated for typedef for function or function + pointer. +* Fixed problem of no tags generated after encountering strange function + declarations (e.g. "foo(params) saywhat (void);") + +ctags-1.5 (Sat Oct 5 1996) +* Added generation of tags for objects inside C++ extern blocks (e.g. + 'extern "C" {...}' construct). +* Added generation of tags for function definitions inside brace-enclosed + ("{}") blocks, since function definitions are allowed inside classes and + extern blocks. +* Added the -N option to force patterns to be used for all tags (including + macro tags). +* Changed the search patterns generated for macro definitions to be terminated + with "\>", which allows the pattern to still match even when the #define + line beyond the macro name is changed. +* Fixed problem resulting in no tags generated for files containing a + function-like macro, such as 'MODULE_ID("$Id")', even when ignoring the + keyword. This also fixed a problem that caused tags to be missed for + initialized function pointer definitions. +* Redirected error messages to stdout for MSDOS version. + +ctags-1.4 (Sun Aug 18 1996) +* Added recursive parsing of class/struct/enum blocks to look for + class/struct/enum tags and enumeration values. +* Added the -I option to specify keywords to ignore in the source files. +* Fixed problem resulting in no tag generated when declaring a pointer const + or volatile. +* Fixed problem resulting in no tag generated for comma terminated function + declarations. + +ctags-1.3 (Sun Jun 16 1996) +* Fixed problem problem which caused "struct tag;" to be misinterpreted as a + variable definition. +* Added the -n option to use line numbers in the tag file instead of patterns. +* Added the -? option as an alternative for the --help option. + +ctags-1.2 (Wed Jun 5 1996) +* Fixed a problem caused by an unitialized variable. + +ctags-1.1 (Tue Jun 4 1996) +* Fixed problem reading parameter list to the -h option. + +ctags-1.0a (Mon Jun 3 1996) +* Added ctags.lsm to distribution + +ctags-1.0 (Fri May 31 1996) +* First public release. + +vim:tw=78 diff --git a/README b/README new file mode 100644 index 0000000..f9554f3 --- /dev/null +++ b/README @@ -0,0 +1,73 @@ +Exuberant Ctags +=============== +Author: Darren Hiebert + http://ctags.sourceforge.net + Instant Messaging: + Yahoo! ID : dbhiebert + AIM ScreenName: darrenhiebert + +Exuberant Ctags is a multilanguage reimplementation of the much-underused +ctags(1) program and is intended to be the mother of all ctags programs. It +generates indexes of source code definitions which are used by a number of +editors and tools. The motivation which drove the development of Exuberant +Ctags was the need for a ctags program which supported generation of tags +for all possible C language constructs (which no other ctags offers), and +because most were easily fooled by a number of preprocessor contructs. + + +Exuberant Ctags offers the following features: + +1. It supports the following languages: Assembler, AWK, ASP, BETA, + Bourne/Korn/Z Shell, C, C++, C#, COBOL, Eiffel, Erlang, Fortran, Java, Lisp, + Lua, Makefile, Pascal, Perl, PHP, PL/SQL, Python, REXX, Ruby, Scheme, + S-Lang, SML (Standard ML), Tcl, Vera, Verilog, VHDL, Vim, and YACC. + +2. It is capable of generating tags for virtually all C language constructs. + +3. It is very robust in parsing code. In particular, the C/C++ parser is + far less easily fooled by code containing #if preprocessor conditional + constructs, using a conditional path selection algorithm to resolve + complicated situations, and a fall-back algorithm when this one fails. + +4. Supports output of Emacs-style TAGS files (i.e. "etags"). + +5. User-defined languages, using Posix regular expressions. + +6. Supports UNIX, MSDOS, Windows 95/98/NT/2000/XP, OS/2, QNX, Amiga, QDOS, + RISC OS, VMS, Macintosh, and Cray. Some pre-compiled binaries are + available on the web site. + + +Visit the Exuberant Ctags web site: + + http://ctags.sourceforge.net + + +Which brings us to the most obvious question: + + Q: Why is it called "Exuberant" ctags? + A: Because one of the meanings of the word is: + + exuberant : produced in extreme abundance : PLENTIFUL syn see PROFUSE + +Compare the tag file produced by Exuberant Ctags with that produced by any +other ctags and you will see how appropriate the name is. + + +This source code is distributed according to the terms of the GNU General +Public License. It is provided on an as-is basis and no responsibility is +accepted for its failure to perform as expected. It is worth at least as +much as you paid for it! + +Exuberant Ctags was originally derived from and inspired by the ctags +program by Steve Kirkendall (kirkenda@cs.pdx.edu) that comes with the Elvis +vi clone (though almost none of the original code remains). This, too, is +freely available. + +Please report any problems you find. The two problems I expect to be most +likely are either a tag which you expected but is missing, or a tag created +in error (shouldn't really be a tag). Please include a sample of code (the +definition) for the object which misbehaves. + +-- +vim:tw=76:sw=4:et: diff --git a/ant.c b/ant.c new file mode 100644 index 0000000..eedfcec --- /dev/null +++ b/ant.c @@ -0,0 +1,42 @@ +/* +* $Id$ +* +* Copyright (c) 2008, David Fishburn +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Ant language files. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include +#include "parse.h" + +/* +* FUNCTION DEFINITIONS +*/ + +static void installAntRegex (const langType language) +{ + addTagRegex (language, + "^[ \t]*<[ \t]*project.*name=\"([^\"]+)\".*", "\\1", "p,project,projects", NULL); + addTagRegex (language, + "^[ \t]*<[ \t]*target.*name=\"([^\"]+)\".*", "\\1", "t,target,targets", NULL); +} + +extern parserDefinition* AntParser () +{ + static const char *const extensions [] = { "build.xml", NULL }; + parserDefinition* const def = parserNew ("Ant"); + def->extensions = extensions; + def->initialize = installAntRegex; + def->regex = TRUE; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/argproc.c b/argproc.c new file mode 100644 index 0000000..e06182f --- /dev/null +++ b/argproc.c @@ -0,0 +1,505 @@ +/* +* $Id: argproc.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1989, Mark Pizzolato (mark@infopiz.uucp) +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Provided by Stephen P. Wall +* Extracted from the VMS port of GNU patch-2.1. +* +* This module provides redirection support for the VAX DECC port of +* Exuberant Ctags. +*/ +/* + * @(#)argproc.c 1.0 89/02/01 Mark Pizzolato (mark@infopiz.uucp) + */ + +#ifndef lint +char argproc_version [] = "@(#)argproc.c VMS uucp Version infopiz-1.0"; +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* System Information Definitions */ + +#define EXIT_OK 1 /* image exit code */ +#define EXIT_ERR 0x10000000 /* image exit code */ + +/* + * getredirection() is intended to aid in porting C programs + * to VMS (Vax-11 C) which does not support '>' and '<' + * I/O redirection, along with a command line pipe mechanism + * using the '|' AND background command execution '&'. + * The piping mechanism will probably work with almost any 'filter' type + * of program. With suitable modification, it may useful for other + * portability problems as well. + * + * Author: Mark Pizzolato mark@infopiz.UUCP + * Mods: Steve Wall Don't return a full path unless the + * original filename included a path. + */ +struct list_item + { + struct list_item *next; + char *value; + }; + +static expand_wild_cards (); +static char *pipe_and_fork (); + +int +getredirection (ac, av) +int *ac; +char ***av; +/* + * Process vms redirection arg's. Exit if any error is seen. + * If getredirection() processes an argument, it is erased + * from the vector. getredirection () returns a new argc and argv value. + * In the event that a background command is requested (by a trailing "&"), + * this routine creates a background subprocess, and simply exits the program. + * + * Warning: do not try to simplify the code for vms. The code + * presupposes that getredirection() is called before any data is + * read from stdin or written to stdout. + * + * Normal usage is as follows: + * + * main (argc, argv) + * int argc; + * char *argv []; + * { + * getredirection (&argc, &argv); + * } + */ +{ + int argc = *ac; /* Argument Count */ + char **argv = *av; /* Argument Vector */ + char *ap; /* Argument pointer */ + int j; /* argv [] index */ + extern int errno; /* Last vms i/o error */ + int item_count = 0; /* Count of Items in List */ + struct list_item *list_head = 0; /* First Item in List */ + struct list_item *list_tail; /* Last Item in List */ + char *in = NULL; /* Input File Name */ + char *out = NULL; /* Output File Name */ + char *outmode = "w"; /* Mode to Open Output File */ + int cmargc = 0; /* Piped Command Arg Count */ + char **cmargv = NULL;/* Piped Command Arg Vector */ + + /* + * First handle the case where the last thing on the line ends with + * a '&'. This indicates the desire for the command to be run in a + * subprocess, so we satisfy that desire. + */ + { + extern background_process (); + ap = argv [argc-1]; + if (0 == strcmp ("&", ap)) + exit (background_process (--argc, argv)); + if ('&' == ap [strlen (ap)-1]) + { + ap [strlen (ap)-1] = '\0'; + exit (background_process (argc, argv)); + } + } + /* + * Now we handle the general redirection cases that involve '>', '>>', + * '<', and pipes '|'. + */ + for (j = 0; j < argc; ++j) + { + if (0 == strcmp ("<", argv [j])) + { + if (j+1 >= argc) + { + errno = EINVAL; + perror ("No input file"); + exit (EXIT_ERR); + } + in = argv [++j]; + continue; + } + if ('<' == *(ap = argv [j])) + { + in = 1 + ap; + continue; + } + if (0 == strcmp (">", ap)) + { + if (j+1 >= argc) + { + errno = EINVAL; + perror ("No output file"); + exit (EXIT_ERR); + } + out = argv [++j]; + continue; + } + if ('>' == *ap) + { + if ('>' == ap [1]) + { + outmode = "a"; + if ('\0' == ap [2]) + out = argv [++j]; + else + out = 2 + ap; + } + else + out = 1 + ap; + continue; + } + if (0 == strcmp ("|", argv [j])) + { + if (j+1 >= argc) + { + errno = EPIPE; + perror ("No command to Pipe to"); + exit (EXIT_ERR); + } + cmargc = argc- (j+1); + cmargv = &argv [j+1]; + argc = j; + continue; + } + if ('|' == *(ap = argv [j])) + { + ++argv [j]; + cmargc = argc-j; + cmargv = &argv [j]; + argc = j; + continue; + } + expand_wild_cards (ap, &list_head, &list_tail, &item_count); + } + /* + * Allocate and fill in the new argument vector, Some Unix's terminate + * the list with an extra null pointer. + */ + argv = *av = calloc (item_count+1, sizeof (char *)); + for (j = 0; j < item_count; ++j, list_head = list_head->next) + argv [j] = list_head->value; + *ac = item_count; + if (cmargv != NULL) + { + char subcmd [1024]; + + if (out != NULL) + { + errno = EINVAL; + perror ("Invalid '|' and '>' specified"); + exit (EXIT_ERR); + } + strcpy (subcmd, cmargv [0]); + for (j = 1; j < cmargc; ++j) + { + strcat (subcmd, " \""); + strcat (subcmd, cmargv [j]); + strcat (subcmd, "\""); + } + out = pipe_and_fork (subcmd); + } + if ((in != NULL) && (NULL == freopen (in, "r", stdin, "mbc=32", "mbf=2"))) + { + perror (in); /* Can't find file */ + exit (EXIT_ERR); /* Is a fatal error */ + } + if ((out != NULL) && (NULL == freopen (out, outmode, stdout, "mbc=32", "mbf=2"))) + { + perror (ap); /* Error, can't write or append */ + exit (EXIT_ERR); /* Is a fatal error */ + } +#ifdef DEBUG + fprintf (stderr, "Arglist:\n"); + for (j = 0; j < *ac; ++j) + fprintf (stderr, "argv[%d] = '%s'\n", j, argv [j]); +#endif + return 0; +} + +static add_item (head, tail, value, count) +struct list_item **head; +struct list_item **tail; +char *value; +int *count; +{ + if (*head == 0) + { + if (NULL == (*head = calloc (1, sizeof (**head)))) + { + errno = ENOMEM; + perror (""); + exit (EXIT_ERR); + } + *tail = *head; + } + else + if (NULL == ((*tail)->next = calloc (1, sizeof (**head)))) + { + errno = ENOMEM; + perror (""); + exit (EXIT_ERR); + } + else + *tail = (*tail)->next; + (*tail)->value = value; + ++ (*count); +} + +static expand_wild_cards (item, head, tail, count) +char *item; +struct list_item **head; +struct list_item **tail; +int *count; +{ +int expcount = 0; +int context = 0; +int status; +int status_value; +char *had_version; +int had_path; +$DESCRIPTOR (filespec, item); +/*$DESCRIPTOR (defaultspec, "SYS$DISK:[]*.*;");*/ +$DESCRIPTOR (defaultspec, ""); +$DESCRIPTOR (resultspec, ""); + + if (strcspn (item, "*%") == strlen (item)) + { + add_item (head, tail, item, count); + return; + } + resultspec.dsc$b_dtype = DSC$K_DTYPE_T; + resultspec.dsc$b_class = DSC$K_CLASS_D; + resultspec.dsc$a_pointer = NULL; + filespec.dsc$w_length = strlen (item); + /* + * Only return version specs, if the caller specified a version + */ + had_version = strchr (item, ';'); + /* + * Only return full path if the caller specified a path + */ + had_path = (strchr (item, ']') || strchr (item, ':')); + while (1 == (1&lib$find_file (&filespec, &resultspec, &context, + &defaultspec, 0, &status_value, &0))) + { + char *string; + char *c; + + if (NULL == (string = calloc (1, resultspec.dsc$w_length+1))) + { + errno = ENOMEM; + perror (""); + exit (EXIT_ERR); + } + strncpy (string, resultspec.dsc$a_pointer, resultspec.dsc$w_length); + string [resultspec.dsc$w_length] = '\0'; + if (NULL == had_version) + *((char *) strrchr (string, ';')) = '\0'; + if (!had_path) { + char *s = strrchr (string, ']'); + if ( s == NULL ) s = strrchr (string, ':'); + if ( s != NULL ) strcpy (string, s+1); + } + /* + * Be consistent with what the C RTL has already done to the rest of + * the argv items and lowercase all of these names. + */ + for (c = string; *c; ++c) + if (isupper (*c)) + *c = tolower (*c); + add_item (head, tail, string, count); + ++expcount; + } + if (expcount == 0) + add_item (head, tail, item, count); + lib$sfree1_dd (&resultspec); + lib$find_file_end (&context); +} + +static int child_st [2]; /* Event Flag set when child process completes */ + +static short child_chan;/* I/O Channel for Pipe Mailbox */ + +static exit_handler (status) +int *status; +{ +short iosb [4]; + + if (0 == child_st [0]) + { +#ifdef DEBUG + fprintf (stderr, "Waiting for Child Process to Finnish . . .\n"); +#endif + sys$qiow (0, child_chan, IO$_WRITEOF, iosb, 0, 0, 0, 0, 0, 0, 0, 0); + sys$dassgn (child_chan); + fclose (stdout); + sys$synch (0, child_st); + } +} + + +static sig_child (chan) +int chan; +{ +#ifdef DEBUG + fprintf (stderr, "Child Completion AST\n"); +#endif + if (child_st [0] == 0) + child_st [0] = 1; +} + +static struct exit_control_block + { + struct exit_control_block *flink; + int (*exit_routine) (); + int arg_count; + int *status_address; + int exit_status; + } exit_block = + { + 0, + exit_handler, + 1, + &exit_block.exit_status, + 0 + }; + +static char *pipe_and_fork (cmd) +char *cmd; +{ + $DESCRIPTOR (cmddsc, cmd); + static char mbxname [64]; + $DESCRIPTOR (mbxdsc, mbxname); + short iosb [4]; + int status; + int pid; + struct + { + short dna_buflen; + short dna_itmcod; + char *dna_buffer; + unsigned short *dna_retlen; + int listend; + } itmlst = + { + sizeof (mbxname), + DVI$_DEVNAM, + mbxname, + &mbxdsc.dsc$w_length, + 0 + }; + int mbxsize; + struct + { + short mbf_buflen; + short mbf_itmcod; + int *mbf_maxbuf; + unsigned short *mbf_retlen; + int listend; + } syiitmlst = + { + sizeof (mbxsize), + SYI$_MAXBUF, + &mbxsize, + 0, + 0 + }; + + cmddsc.dsc$w_length = strlen (cmd); + /* + * Get the SYSGEN parameter MAXBUF, and the smaller of it and 2048 as + * the size of the 'pipe' mailbox. + */ + if (1 == (1& (vaxc$errno = sys$getsyiw (0, 0, 0, &syiitmlst, iosb, 0, 0, 0)))) + vaxc$errno = iosb [0]; + if (0 == (1&vaxc$errno)) + { + errno = EVMSERR; + perror ("Can't get SYSGEN parameter value for MAXBUF"); + exit (EXIT_ERR); + } + if (mbxsize > 2048) + mbxsize = 2048; + if (0 == (1& (vaxc$errno = sys$crembx (0, &child_chan, mbxsize, mbxsize, 0, 0, 0)))) + { + errno = EVMSERR; + perror ("Can't create pipe mailbox"); + exit (EXIT_ERR); + } + if (1 == (1& (vaxc$errno = sys$getdviw (0, child_chan, 0, &itmlst, iosb, + 0, 0, 0)))) + vaxc$errno = iosb [0]; + if (0 == (1&vaxc$errno)) + { + errno = EVMSERR; + perror ("Can't get pipe mailbox device name"); + exit (EXIT_ERR); + } + mbxname [mbxdsc.dsc$w_length] = '\0'; +#ifdef DEBUG + fprintf (stderr, "Pipe Mailbox Name = '%s'\n", mbxname); +#endif + if (0 == (1& (vaxc$errno = lib$spawn (&cmddsc, &mbxdsc, 0, &1, + 0, &pid, child_st, &0, sig_child, + &child_chan)))) + { + errno = EVMSERR; + perror ("Can't spawn subprocess"); + exit (EXIT_ERR); + } +#ifdef DEBUG + fprintf (stderr, "Subprocess's Pid = %08X\n", pid); +#endif + sys$dclexh (&exit_block); + return (mbxname); +} + +background_process (argc, argv) +int argc; +char **argv; +{ +char command [2048] = "$"; +$DESCRIPTOR (value, command); +$DESCRIPTOR (cmd, "BACKGROUND$COMMAND"); +$DESCRIPTOR (null, "NLA0:"); +int pid; + + strcat (command, argv [0]); + while (--argc) + { + strcat (command, " \""); + strcat (command, *(++argv)); + strcat (command, "\""); + } + value.dsc$w_length = strlen (command); + if (0 == (1& (vaxc$errno = lib$set_symbol (&cmd, &value)))) + { + errno = EVMSERR; + perror ("Can't create symbol for subprocess command"); + exit (EXIT_ERR); + } + if (0 == (1& (vaxc$errno = lib$spawn (&cmd, &null, 0, &17, 0, &pid)))) + { + errno = EVMSERR; + perror ("Can't spawn subprocess"); + exit (EXIT_ERR); + } +#ifdef DEBUG + fprintf (stderr, "%s\n", command); +#endif + fprintf (stderr, "%08X\n", pid); + return (EXIT_OK); +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/args.c b/args.c new file mode 100644 index 0000000..a9336d2 --- /dev/null +++ b/args.c @@ -0,0 +1,274 @@ +/* +* $Id: args.c 536 2007-06-02 06:09:00Z elliotth $ +* +* Copyright (c) 1999-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for reading command line arguments. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include +#include +#include + +#include "args.h" +#include "debug.h" +#include "routines.h" + +/* +* FUNCTION DEFINITIONS +*/ + +static char *nextStringArg (const char** const next) +{ + char* result = NULL; + const char* start; + + Assert (*next != NULL); + for (start = *next ; isspace ((int) *start) ; ++start) + ; + if (*start == '\0') + *next = start; + else + { + size_t length; + const char* end; + + for (end = start ; *end != '\0' && ! isspace ((int) *end) ; ++end) + ; + length = end - start; + Assert (length > 0); + result = xMalloc (length + 1, char); + strncpy (result, start, length); + result [length] = '\0'; + *next = end; + } + return result; +} + +static char* nextStringLine (const char** const next) +{ + char* result = NULL; + size_t length; + const char* end; + + Assert (*next != NULL); + for (end = *next ; *end != '\n' && *end != '\0' ; ++end) + ; + length = end - *next; + if (length > 0) + { + result = xMalloc (length + 1, char); + strncpy (result, *next, length); + result [length] = '\0'; + } + if (*end == '\n') + ++end; + else if (*end == '\r') + { + ++end; + if (*end == '\n') + ++end; + } + *next = end; + return result; +} + +static char* nextString (const Arguments* const current, const char** const next) +{ + char* result; + if (current->lineMode) + result = nextStringLine (next); + else + result = nextStringArg (next); + return result; +} + +static char* nextFileArg (FILE* const fp) +{ + char* result = NULL; + Assert (fp != NULL); + if (! feof (fp)) + { + vString* vs = vStringNew (); + int c; + do + c = fgetc (fp); + while (isspace (c)); + + if (c != EOF) + { + do + { + vStringPut (vs, c); + c = fgetc (fp); + } while (c != EOF && ! isspace (c)); + vStringTerminate (vs); + Assert (vStringLength (vs) > 0); + result = xMalloc (vStringLength (vs) + 1, char); + strcpy (result, vStringValue (vs)); + } + vStringDelete (vs); + } + return result; +} + +static char* nextFileLine (FILE* const fp) +{ + char* result = NULL; + if (! feof (fp)) + { + vString* vs = vStringNew (); + int c; + + Assert (fp != NULL); + c = fgetc (fp); + while (c != EOF) + { + if (c != '\n' && c != '\r') + vStringPut (vs, c); + else if (vStringLength (vs) > 0) + break; + c = fgetc (fp); + } + if (c != EOF || vStringLength (vs) > 0) + { + if (c == '\r') + { + c = fgetc (fp); + if (c != '\n') + c = ungetc (c, fp); + } + vStringTerminate (vs); + vStringStripTrailing (vs); + result = xMalloc (vStringLength (vs) + 1, char); + strcpy (result, vStringValue (vs)); + } + vStringDelete (vs); + } + return result; +} + +static char* nextFileString (const Arguments* const current, FILE* const fp) +{ + char* result; + if (current->lineMode) + result = nextFileLine (fp); + else + result = nextFileArg (fp); + return result; +} + +extern Arguments* argNewFromString (const char* const string) +{ + Arguments* result = xMalloc (1, Arguments); + memset (result, 0, sizeof (Arguments)); + result->type = ARG_STRING; + result->u.stringArgs.string = string; + result->u.stringArgs.item = string; + result->u.stringArgs.next = string; + result->item = nextString (result, &result->u.stringArgs.next); + return result; +} + +extern Arguments* argNewFromArgv (char* const* const argv) +{ + Arguments* result = xMalloc (1, Arguments); + memset (result, 0, sizeof (Arguments)); + result->type = ARG_ARGV; + result->u.argvArgs.argv = argv; + result->u.argvArgs.item = result->u.argvArgs.argv; + result->item = *result->u.argvArgs.item; + return result; +} + +extern Arguments* argNewFromFile (FILE* const fp) +{ + Arguments* result = xMalloc (1, Arguments); + memset (result, 0, sizeof (Arguments)); + result->type = ARG_FILE; + result->u.fileArgs.fp = fp; + result->item = nextFileString (result, result->u.fileArgs.fp); + return result; +} + +extern Arguments* argNewFromLineFile (FILE* const fp) +{ + Arguments* result = xMalloc (1, Arguments); + memset (result, 0, sizeof (Arguments)); + result->type = ARG_FILE; + result->lineMode = TRUE; + result->u.fileArgs.fp = fp; + result->item = nextFileString (result, result->u.fileArgs.fp); + return result; +} + +extern char *argItem (const Arguments* const current) +{ + Assert (current != NULL); + Assert (! argOff (current)); + return current->item; +} + +extern boolean argOff (const Arguments* const current) +{ + Assert (current != NULL); + return (boolean) (current->item == NULL); +} + +extern void argSetWordMode (Arguments* const current) +{ + Assert (current != NULL); + current->lineMode = FALSE; +} + +extern void argSetLineMode (Arguments* const current) +{ + Assert (current != NULL); + current->lineMode = TRUE; +} + +extern void argForth (Arguments* const current) +{ + Assert (current != NULL); + Assert (! argOff (current)); + switch (current->type) + { + case ARG_STRING: + if (current->item != NULL) + eFree (current->item); + current->u.stringArgs.item = current->u.stringArgs.next; + current->item = nextString (current, ¤t->u.stringArgs.next); + break; + case ARG_ARGV: + ++current->u.argvArgs.item; + current->item = *current->u.argvArgs.item; + break; + case ARG_FILE: + if (current->item != NULL) + eFree (current->item); + current->item = nextFileString (current, current->u.fileArgs.fp); + break; + default: + Assert ("Invalid argument type" == NULL); + break; + } +} + +extern void argDelete (Arguments* const current) +{ + Assert (current != NULL); + if (current->type == ARG_STRING && current->item != NULL) + eFree (current->item); + memset (current, 0, sizeof (Arguments)); + eFree (current); +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/args.h b/args.h new file mode 100644 index 0000000..985a06c --- /dev/null +++ b/args.h @@ -0,0 +1,63 @@ +/* +* $Id: args.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1999-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Defines external interface to command line argument reading. +*/ +#ifndef _ARGS_H +#define _ARGS_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +/* +* DATA DECLARATIONS +*/ + +typedef enum { ARG_NONE, ARG_STRING, ARG_ARGV, ARG_FILE } argType; + +typedef struct sArgs { + argType type; + union { + struct sStringArgs { + const char* string; + const char* next; + const char* item; + } stringArgs; + struct sArgvArgs { + char* const* argv; + char* const* item; + } argvArgs; + struct sFileArgs { + FILE* fp; + } fileArgs; + } u; + char* item; + boolean lineMode; +} Arguments; + +/* +* FUNCTION PROTOTYPES +*/ +extern Arguments* argNewFromString (const char* const string); +extern Arguments* argNewFromArgv (char* const* const argv); +extern Arguments* argNewFromFile (FILE* const fp); +extern Arguments* argNewFromLineFile (FILE* const fp); +extern char *argItem (const Arguments* const current); +extern boolean argOff (const Arguments* const current); +extern void argSetWordMode (Arguments* const current); +extern void argSetLineMode (Arguments* const current); +extern void argForth (Arguments* const current); +extern void argDelete (Arguments* const current); + +#endif /* _ARGS_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/asm.c b/asm.c new file mode 100644 index 0000000..8c1ff2b --- /dev/null +++ b/asm.c @@ -0,0 +1,387 @@ +/* +* $Id: asm.c 536 2007-06-02 06:09:00Z elliotth $ +* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for assembly language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "debug.h" +#include "keyword.h" +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* +* DATA DECLARATIONS +*/ +typedef enum { + K_NONE = -1, K_DEFINE, K_LABEL, K_MACRO, K_TYPE +} AsmKind; + +typedef enum { + OP_UNDEFINED = -1, + OP_ALIGN, + OP_COLON_EQUAL, + OP_END, + OP_ENDM, + OP_ENDMACRO, + OP_ENDP, + OP_ENDS, + OP_EQU, + OP_EQUAL, + OP_LABEL, + OP_MACRO, + OP_PROC, + OP_RECORD, + OP_SECTIONS, + OP_SET, + OP_STRUCT, + OP_LAST +} opKeyword; + +typedef struct { + const char *operator; + opKeyword keyword; +} asmKeyword; + +typedef struct { + opKeyword keyword; + AsmKind kind; +} opKind; + +/* +* DATA DEFINITIONS +*/ +static langType Lang_asm; + +static kindOption AsmKinds [] = { + { TRUE, 'd', "define", "defines" }, + { TRUE, 'l', "label", "labels" }, + { TRUE, 'm', "macro", "macros" }, + { TRUE, 't', "type", "types (structs and records)" } +}; + +static const asmKeyword AsmKeywords [] = { + { "align", OP_ALIGN }, + { "endmacro", OP_ENDMACRO }, + { "endm", OP_ENDM }, + { "end", OP_END }, + { "endp", OP_ENDP }, + { "ends", OP_ENDS }, + { "equ", OP_EQU }, + { "label", OP_LABEL }, + { "macro", OP_MACRO }, + { ":=", OP_COLON_EQUAL }, + { "=", OP_EQUAL }, + { "proc", OP_PROC }, + { "record", OP_RECORD }, + { "sections", OP_SECTIONS }, + { "set", OP_SET }, + { "struct", OP_STRUCT } +}; + +static const opKind OpKinds [] = { + /* must be ordered same as opKeyword enumeration */ + { OP_ALIGN, K_NONE }, + { OP_COLON_EQUAL, K_DEFINE }, + { OP_END, K_NONE }, + { OP_ENDM, K_NONE }, + { OP_ENDMACRO, K_NONE }, + { OP_ENDP, K_NONE }, + { OP_ENDS, K_NONE }, + { OP_EQU, K_DEFINE }, + { OP_EQUAL, K_DEFINE }, + { OP_LABEL, K_LABEL }, + { OP_MACRO, K_MACRO }, + { OP_PROC, K_LABEL }, + { OP_RECORD, K_TYPE }, + { OP_SECTIONS, K_NONE }, + { OP_SET, K_DEFINE }, + { OP_STRUCT, K_TYPE } +}; + +/* +* FUNCTION DEFINITIONS +*/ +static void buildAsmKeywordHash (void) +{ + const size_t count = sizeof (AsmKeywords) / sizeof (AsmKeywords [0]); + size_t i; + for (i = 0 ; i < count ; ++i) + { + const asmKeyword* const p = AsmKeywords + i; + addKeyword (p->operator, Lang_asm, (int) p->keyword); + } +} + +static opKeyword analyzeOperator (const vString *const op) +{ + vString *keyword = vStringNew (); + opKeyword result; + + vStringCopyToLower (keyword, op); + result = (opKeyword) lookupKeyword (vStringValue (keyword), Lang_asm); + vStringDelete (keyword); + return result; +} + +static boolean isInitialSymbolCharacter (int c) +{ + return (boolean) (c != '\0' && (isalpha (c) || strchr ("_$", c) != NULL)); +} + +static boolean isSymbolCharacter (int c) +{ + /* '?' character is allowed in AMD 29K family */ + return (boolean) (c != '\0' && (isalnum (c) || strchr ("_$?", c) != NULL)); +} + +static boolean readPreProc (const unsigned char *const line) +{ + boolean result; + const unsigned char *cp = line; + vString *name = vStringNew (); + while (isSymbolCharacter ((int) *cp)) + { + vStringPut (name, *cp); + ++cp; + } + vStringTerminate (name); + result = (boolean) (strcmp (vStringValue (name), "define") == 0); + if (result) + { + while (isspace ((int) *cp)) + ++cp; + vStringClear (name); + while (isSymbolCharacter ((int) *cp)) + { + vStringPut (name, *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AsmKinds, K_DEFINE); + } + vStringDelete (name); + return result; +} + +static AsmKind operatorKind ( + const vString *const operator, + boolean *const found) +{ + AsmKind result = K_NONE; + const opKeyword kw = analyzeOperator (operator); + *found = (boolean) (kw != OP_UNDEFINED); + if (*found) + { + result = OpKinds [kw].kind; + Assert (OpKinds [kw].keyword == kw); + } + return result; +} + +/* We must check for "DB", "DB.L", "DCB.W" (68000) + */ +static boolean isDefineOperator (const vString *const operator) +{ + const unsigned char *const op = + (unsigned char*) vStringValue (operator); + const size_t length = vStringLength (operator); + const boolean result = (boolean) (length > 0 && + toupper ((int) *op) == 'D' && + (length == 2 || + (length == 4 && (int) op [2] == '.') || + (length == 5 && (int) op [3] == '.'))); + return result; +} + +static void makeAsmTag ( + const vString *const name, + const vString *const operator, + const boolean labelCandidate, + const boolean nameFollows) +{ + if (vStringLength (name) > 0) + { + boolean found; + const AsmKind kind = operatorKind (operator, &found); + if (found) + { + if (kind != K_NONE) + makeSimpleTag (name, AsmKinds, kind); + } + else if (isDefineOperator (operator)) + { + if (! nameFollows) + makeSimpleTag (name, AsmKinds, K_DEFINE); + } + else if (labelCandidate) + { + operatorKind (name, &found); + if (! found) + makeSimpleTag (name, AsmKinds, K_LABEL); + } + } +} + +static const unsigned char *readSymbol ( + const unsigned char *const start, + vString *const sym) +{ + const unsigned char *cp = start; + vStringClear (sym); + if (isInitialSymbolCharacter ((int) *cp)) + { + while (isSymbolCharacter ((int) *cp)) + { + vStringPut (sym, *cp); + ++cp; + } + vStringTerminate (sym); + } + return cp; +} + +static const unsigned char *readOperator ( + const unsigned char *const start, + vString *const operator) +{ + const unsigned char *cp = start; + vStringClear (operator); + while (*cp != '\0' && ! isspace ((int) *cp)) + { + vStringPut (operator, *cp); + ++cp; + } + vStringTerminate (operator); + return cp; +} + +static void findAsmTags (void) +{ + vString *name = vStringNew (); + vString *operator = vStringNew (); + const unsigned char *line; + boolean inCComment = FALSE; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp = line; + boolean labelCandidate = (boolean) (! isspace ((int) *cp)); + boolean nameFollows = FALSE; + const boolean isComment = (boolean) + (*cp != '\0' && strchr (";*@", *cp) != NULL); + + /* skip comments */ + if (strncmp ((const char*) cp, "/*", (size_t) 2) == 0) + { + inCComment = TRUE; + cp += 2; + } + if (inCComment) + { + do + { + if (strncmp ((const char*) cp, "*/", (size_t) 2) == 0) + { + inCComment = FALSE; + cp += 2; + break; + } + ++cp; + } while (*cp != '\0'); + } + if (isComment || inCComment) + continue; + + /* read preprocessor defines */ + if (*cp == '#') + { + ++cp; + readPreProc (cp); + continue; + } + + /* skip white space */ + while (isspace ((int) *cp)) + ++cp; + + /* read symbol */ + cp = readSymbol (cp, name); + if (vStringLength (name) > 0 && *cp == ':') + { + labelCandidate = TRUE; + ++cp; + } + + if (! isspace ((int) *cp) && *cp != '\0') + continue; + + /* skip white space */ + while (isspace ((int) *cp)) + ++cp; + + /* skip leading dot */ +#if 0 + if (*cp == '.') + ++cp; +#endif + + cp = readOperator (cp, operator); + + /* attempt second read of symbol */ + if (vStringLength (name) == 0) + { + while (isspace ((int) *cp)) + ++cp; + cp = readSymbol (cp, name); + nameFollows = TRUE; + } + makeAsmTag (name, operator, labelCandidate, nameFollows); + } + vStringDelete (name); + vStringDelete (operator); +} + +static void initialize (const langType language) +{ + Lang_asm = language; + buildAsmKeywordHash (); +} + +extern parserDefinition* AsmParser (void) +{ + static const char *const extensions [] = { + "asm", "ASM", "s", "S", NULL + }; + static const char *const patterns [] = { + "*.A51", + "*.29[kK]", + "*.[68][68][kKsSxX]", + "*.[xX][68][68]", + NULL + }; + parserDefinition* def = parserNew ("Asm"); + def->kinds = AsmKinds; + def->kindCount = KIND_COUNT (AsmKinds); + def->extensions = extensions; + def->patterns = patterns; + def->parser = findAsmTags; + def->initialize = initialize; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/asp.c b/asp.c new file mode 100644 index 0000000..7290ad8 --- /dev/null +++ b/asp.c @@ -0,0 +1,328 @@ +/* +* $Id: asp.c 711 2009-07-04 16:52:11Z dhiebert $ +* +* Copyright (c) 2000, Patrick Dehne +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for the ASP (Active +* Server Pages) web page scripting language. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "parse.h" +#include "read.h" +#include "vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_CONST, K_CLASS, K_FUNCTION, K_SUB, K_DIM +} aspKind; + +static kindOption AspKinds [] = { + { TRUE, 'd', "constant", "constants"}, + { TRUE, 'c', "class", "classes"}, + { TRUE, 'f', "function", "functions"}, + { TRUE, 's', "subroutine", "subroutines"}, + { TRUE, 'v', "variable", "variables"} +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static void findAspTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp = line; + + while (*cp != '\0') + { + /* jump over whitespace */ + while (isspace ((int)*cp)) + cp++; + + /* jump over strings */ + if (*cp == '"') + { + cp++; + while (*cp!='"' && *cp!='\0') + cp++; + } + + /* jump over comments */ + else if (*cp == '\'') + break; + + /* jump over end function/sub lines */ + else if (strncasecmp ((const char*) cp, "end", (size_t) 3)== 0) + { + cp += 3; + if (isspace ((int)*cp)) + { + while (isspace ((int)*cp)) + ++cp; + + if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0) + { + cp+=8; + break; + } + + else if (strncasecmp ((const char*) cp, "sub", (size_t) 3) == 0) + { + cp+=3; + break; + } + } + } + + /* jump over exit function/sub lines */ + else if (strncasecmp ((const char*) cp, "exit", (size_t) 4)==0) + { + cp += 4; + if (isspace ((int) *cp)) + { + while (isspace ((int) *cp)) + ++cp; + + if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0) + { + cp+=8; + break; + } + + else if (strncasecmp ((const char*) cp, "sub", (size_t) 3) == 0) + { + cp+=3; + break; + } + } + } + + /* class member? */ + else if (strncasecmp ((const char*) cp, "public", (size_t) 6) == 0) + { + cp += 6; + if (isspace ((int) *cp)) + { + while (isspace ((int) *cp)) + ++cp; + if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0) + { + cp+=8; + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_FUNCTION); + vStringClear (name); + } + else if (strncasecmp ((const char*) cp, "sub", (size_t) 3) == 0) + { + cp+=3; + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_SUB); + vStringClear (name); + } + else { + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_DIM); + vStringClear (name); + } + } + } + else if (strncasecmp ((const char*) cp, "private", (size_t) 7) == 0) + { + cp += 7; + if (isspace ((int) *cp)) + { + while (isspace ((int) *cp)) + ++cp; + if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0) + { + cp+=8; + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_FUNCTION); + vStringClear (name); + } + else if (strncasecmp ((const char*) cp, "sub", (size_t) 3) == 0) + { + cp+=3; + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_SUB); + vStringClear (name); + } + else { + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_DIM); + vStringClear (name); + } + } + } + + /* function? */ + else if (strncasecmp ((const char*) cp, "function", (size_t) 8) == 0) + { + cp += 8; + + if (isspace ((int) *cp)) + { + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_FUNCTION); + vStringClear (name); + } + } + + /* sub? */ + else if (strncasecmp ((const char*) cp, "sub", (size_t) 3) == 0) + { + cp += 3; + if (isspace ((int) *cp)) + { + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_SUB); + vStringClear (name); + } + } + + /* dim variable? */ + else if (strncasecmp ((const char*) cp, "dim", (size_t) 3) == 0) + { + cp += 3; + if (isspace ((int) *cp)) + { + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_DIM); + vStringClear (name); + } + } + + /* class declaration? */ + else if (strncasecmp ((const char*) cp, "class", (size_t) 5) == 0) + { + cp += 5; + if (isspace ((int) *cp)) + { + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_CLASS); + vStringClear (name); + } + } + + /* const declaration? */ + else if (strncasecmp ((const char*) cp, "const", (size_t) 5) == 0) + { + cp += 5; + if (isspace ((int) *cp)) + { + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, AspKinds, K_CONST); + vStringClear (name); + } + } + + /* nothing relevant */ + else if (*cp != '\0') + cp++; + } + } + vStringDelete (name); +} + +extern parserDefinition* AspParser (void) +{ + static const char *const extensions [] = { "asp", "asa", NULL }; + parserDefinition* def = parserNew ("Asp"); + def->kinds = AspKinds; + def->kindCount = KIND_COUNT (AspKinds); + def->extensions = extensions; + def->parser = findAspTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ + diff --git a/awk.c b/awk.c new file mode 100644 index 0000000..d825d6f --- /dev/null +++ b/awk.c @@ -0,0 +1,81 @@ +/* +* $Id: awk.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2000-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for AWK functions. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "parse.h" +#include "read.h" +#include "vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum eAwkKinds { + K_FUNCTION +} awkKind; + +static kindOption AwkKinds [] = { + { TRUE, 'f', "function", "functions" } +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static void findAwkTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + if (strncmp ((const char*) line, "function", (size_t) 8) == 0 && + isspace ((int) line [8])) + { + const unsigned char *cp = line + 8; + + while (isspace ((int) *cp)) + ++cp; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + while (isspace ((int) *cp)) + ++cp; + if (*cp == '(') + makeSimpleTag (name, AwkKinds, K_FUNCTION); + vStringClear (name); + if (*cp != '\0') + ++cp; + } + } + vStringDelete (name); +} + +extern parserDefinition* AwkParser () +{ + static const char *const extensions [] = { "awk", "gawk", "mawk", NULL }; + parserDefinition* def = parserNew ("Awk"); + def->kinds = AwkKinds; + def->kindCount = KIND_COUNT (AwkKinds); + def->extensions = extensions; + def->parser = findAwkTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/basic.c b/basic.c new file mode 100644 index 0000000..a117afa --- /dev/null +++ b/basic.c @@ -0,0 +1,203 @@ +/* + * $Id:$ + * + * Copyright (c) 2000-2006, Darren Hiebert, Elias Pschernig + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. + * + * This module contains functions for generating tags for BlitzBasic + * (BlitzMax), PureBasic and FreeBasic language files. For now, this is kept + * quite simple - but feel free to ask for more things added any time - + * patches are of course most welcome. + */ + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ + +#include + +#include "options.h" +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* + * DATA DEFINITIONS + */ +typedef enum { + K_CONST, + K_FUNCTION, + K_LABEL, + K_TYPE, + K_VARIABLE, + K_ENUM +} BasicKind; + +typedef struct { + char const *token; + BasicKind kind; + int skip; +} KeyWord; + +static kindOption BasicKinds[] = { + {TRUE, 'c', "constant", "constants"}, + {TRUE, 'f', "function", "functions"}, + {TRUE, 'l', "label", "labels"}, + {TRUE, 't', "type", "types"}, + {TRUE, 'v', "variable", "variables"}, + {TRUE, 'g', "enum", "enumerations"} +}; + +static KeyWord blitzbasic_keywords[] = { + {"const", K_CONST, 0}, + {"global", K_VARIABLE, 0}, + {"dim", K_VARIABLE, 0}, + {"function", K_FUNCTION, 0}, + {"type", K_TYPE, 0}, + {NULL, 0, 0} +}; + +static KeyWord purebasic_keywords[] = { + {"newlist", K_VARIABLE, 0}, + {"global", K_VARIABLE, 0}, + {"dim", K_VARIABLE, 0}, + {"procedure", K_FUNCTION, 0}, + {"interface", K_TYPE, 0}, + {"structure", K_TYPE, 0}, + {NULL, 0, 0} +}; + +static KeyWord freebasic_keywords[] = { + {"const", K_CONST, 0}, + {"dim as", K_VARIABLE, 1}, + {"dim", K_VARIABLE, 0}, + {"common", K_VARIABLE, 0}, + {"function", K_FUNCTION, 0}, + {"sub", K_FUNCTION, 0}, + {"private sub", K_FUNCTION, 0}, + {"public sub", K_FUNCTION, 0}, + {"private function", K_FUNCTION, 0}, + {"public function", K_FUNCTION, 0}, + {"type", K_TYPE, 0}, + {"enum", K_ENUM, 0}, + {NULL, 0, 0} +}; + +/* + * FUNCTION DEFINITIONS + */ + +/* Match the name of a tag (function, variable, type, ...) starting at pos. */ +static char const *extract_name (char const *pos, vString * name) +{ + while (isspace (*pos)) + pos++; + vStringClear (name); + for (; *pos && !isspace (*pos) && *pos != '(' && *pos != ','; pos++) + vStringPut (name, *pos); + vStringTerminate (name); + return pos; +} + +/* Match a keyword starting at p (case insensitive). */ +static int match_keyword (const char *p, KeyWord const *kw) +{ + vString *name; + size_t i; + int j; + for (i = 0; i < strlen (kw->token); i++) + { + if (tolower (p[i]) != kw->token[i]) + return 0; + } + name = vStringNew (); + p += i; + for (j = 0; j < 1 + kw->skip; j++) + { + p = extract_name (p, name); + } + makeSimpleTag (name, BasicKinds, kw->kind); + vStringDelete (name); + return 1; +} + +/* Match a "label:" style label. */ +static void match_colon_label (char const *p) +{ + char const *end = p + strlen (p) - 1; + while (isspace (*end)) + end--; + if (*end == ':') + { + vString *name = vStringNew (); + vStringNCatS (name, p, end - p); + makeSimpleTag (name, BasicKinds, K_LABEL); + vStringDelete (name); + } +} + +/* Match a ".label" style label. */ +static void match_dot_label (char const *p) +{ + if (*p == '.') + { + vString *name = vStringNew (); + extract_name (p + 1, name); + makeSimpleTag (name, BasicKinds, K_LABEL); + vStringDelete (name); + } +} + +static void findBasicTags (void) +{ + const char *line; + const char *extension = fileExtension (vStringValue (File.name)); + KeyWord *keywords; + + if (strcmp (extension, "bb") == 0) + keywords = blitzbasic_keywords; + else if (strcmp (extension, "pb") == 0) + keywords = purebasic_keywords; + else + keywords = freebasic_keywords; + + while ((line = (const char *) fileReadLine ()) != NULL) + { + const char *p = line; + KeyWord const *kw; + + while (isspace (*p)) + p++; + + /* Empty line? */ + if (!*p) + continue; + + /* In Basic, keywords always are at the start of the line. */ + for (kw = keywords; kw->token; kw++) + if (match_keyword (p, kw)) break; + + /* Is it a label? */ + if (strcmp (extension, "bb") == 0) + match_dot_label (p); + else + match_colon_label (p); + } +} + +parserDefinition *BasicParser (void) +{ + static char const *extensions[] = { "bas", "bi", "bb", "pb", NULL }; + parserDefinition *def = parserNew ("Basic"); + def->kinds = BasicKinds; + def->kindCount = KIND_COUNT (BasicKinds); + def->extensions = extensions; + def->parser = findBasicTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/beta.c b/beta.c new file mode 100644 index 0000000..da195a1 --- /dev/null +++ b/beta.c @@ -0,0 +1,321 @@ +/* +* $Id: beta.c 536 2007-06-02 06:09:00Z elliotth $ +* +* Copyright (c) 1999-2000, Mjølner Informatics +* +* Written by Erik Corry +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for BETA language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "entry.h" +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* +* MACROS +*/ +#define isbident(c) (identarray [(unsigned char) (c)]) + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_FRAGMENT, K_PATTERN, K_SLOT, K_VIRTUAL +} betaKind; + +static kindOption BetaKinds [] = { + { TRUE, 'f', "fragment", "fragment definitions"}, + { FALSE, 'p', "pattern", "all patterns"}, + { TRUE, 's', "slot", "slots (fragment uses)"}, + { TRUE, 'v', "virtual", "patterns (virtual or rebound)"} +}; + +/* [A-Z_a-z0-9] */ +static const char identarray [256] = { +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0-15 */ +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-31 */ +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 32-47 !"#$%&'()*+'-./ */ +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 48-63 0123456789:;<=>? */ +0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 64-79 @ABCDEFGHIJKLMNO */ +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 80-95 PQRSTUVWXYZ [\]^_ */ +0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 96-111 `abcdefghijklmno */ +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 112-127 pqrstuvwxyz{|}~ */ +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 128- */ +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* -255 */ + +/* +* FUNCTION DEFINITIONS +*/ + +static void makeBetaTag (const char* const name, const betaKind kind) +{ + if (BetaKinds [kind].enabled) + { + tagEntryInfo e; + initTagEntry (&e, name); + e.kindName = BetaKinds [kind].name; + e.kind = BetaKinds [kind].letter; + makeTagEntry (&e); + } +} + +static void findBetaTags (void) +{ + vString *line = vStringNew (); + boolean incomment = FALSE; + boolean inquote = FALSE; + boolean dovirtuals = BetaKinds [K_VIRTUAL].enabled; + boolean dopatterns = BetaKinds [K_PATTERN].enabled; + + do + { + boolean foundfragmenthere = FALSE; + /* find fragment definition (line that starts and ends with --) */ + int last; + int first; + int c; + + vStringClear (line); + + while ((c = fileGetc ()) != EOF && c != '\n' && c != '\r') + vStringPut (line, c); + + vStringTerminate (line); + + last = vStringLength (line) - 1; + first = 0; + /* skip white space at start and end of line */ + while (last && isspace ((int) vStringChar (line, last))) last--; + while (first < last && isspace ((int) vStringChar (line, first))) first++; + /* if line still has a reasonable length and ... */ + if (last - first > 4 && + (vStringChar (line, first) == '-' && + vStringChar (line, first + 1) == '-' && + vStringChar (line, last) == '-' && + vStringChar (line, last - 1) == '-')) + { + if (!incomment && !inquote) + { + foundfragmenthere = TRUE; + /* skip past -- and whitespace. Also skip back past 'dopart' + or 'attributes' to the :. We have to do this because there + is no sensible way to include whitespace in a ctags token + so the conventional space after the ':' would mess us up */ + last -= 2; + first += 2; + while (last && vStringChar (line, last) != ':') last--; + while (last && (isspace ((int) vStringChar (line, last-1)))) last--; + while (first < last && + (isspace ((int) vStringChar (line, first)) || + vStringChar (line, first) == '-')) + first++; + /* If there's anything left it is a fragment title */ + if (first < last - 1) + { + vStringChar (line, last) = 0; + if (strcasecmp ("LIB", vStringValue (line) + first) && + strcasecmp ("PROGRAM", vStringValue (line) + first)) + { + makeBetaTag (vStringValue (line) + first, K_FRAGMENT); + } + } + } + } else { + int pos = 0; + int len = vStringLength (line); + if (inquote) goto stringtext; + if (incomment) goto commenttext; + programtext: + for ( ; pos < len; pos++) + { + if (vStringChar (line, pos) == '\'') + { + pos++; + inquote = TRUE; + goto stringtext; + } + if (vStringChar (line, pos) == '{') + { + pos++; + incomment = TRUE; + goto commenttext; + } + if (vStringChar (line, pos) == '(' && pos < len - 1 && + vStringChar (line, pos+1) == '*') + { + pos +=2; + incomment = TRUE; + goto commenttext; + } + /* + * SLOT definition looks like this: + * <> + * or + * <> + */ + if (!foundfragmenthere && + vStringChar (line, pos) == '<' && + pos+1 < len && + vStringChar (line, pos+1) == '<' && + strstr (vStringValue (line) + pos, ">>")) + { + /* Found slot name, get start and end */ + int eoname; + char c2; + pos += 2; /* skip past << */ + /* skip past space before SLOT */ + while (pos < len && isspace ((int) vStringChar (line, pos))) + pos++; + /* skip past SLOT */ + if (pos+4 <= len && + !strncasecmp (vStringValue(line) + pos, "SLOT", (size_t)4)) + pos += 4; + /* skip past space after SLOT */ + while (pos < len && isspace ((int) vStringChar (line, pos))) + pos++; + eoname = pos; + /* skip to end of name */ + while (eoname < len && + (c2 = vStringChar (line, eoname)) != '>' && + c2 != ':' && + !isspace ((int) c2)) + eoname++; + if (eoname < len) + { + vStringChar (line, eoname) = 0; + if (strcasecmp ("LIB", vStringValue (line) + pos) && + strcasecmp ("PROGRAM", vStringValue (line) + pos) && + strcasecmp ("SLOT", vStringValue (line) + pos)) + { + makeBetaTag (vStringValue (line) + pos, K_SLOT); + } + } + if (eoname+1 < len) { + pos = eoname + 1; + } else { + pos = len; + continue; + } + } + /* Only patterns that are virtual, extensions of virtuals or + * final bindings are normally included so as not to overload + * totally. + * That means one of the forms name:: name:< or name::< + */ + if (!foundfragmenthere && + vStringChar (line, pos) == ':' && + (dopatterns || + (dovirtuals && + (vStringChar (line, pos+1) == ':' || + vStringChar (line, pos+1) == '<') + ) + ) + ) + { + /* Found pattern name, get start and end */ + int eoname = pos; + int soname; + while (eoname && isspace ((int) vStringChar (line, eoname-1))) + eoname--; + foundanothername: + /* terminate right after name */ + vStringChar (line, eoname) = 0; + soname = eoname; + while (soname && + isbident (vStringChar (line, soname-1))) + { + soname--; + } + if (soname != eoname) + { + makeBetaTag (vStringValue (line) + soname, K_PATTERN); + /* scan back past white space */ + while (soname && + isspace ((int) vStringChar (line, soname-1))) + soname--; + if (soname && vStringChar (line, soname-1) == ',') + { + /* we found a new pattern name before comma */ + eoname = soname; + goto foundanothername; + } + } + } + } + goto endofline; + commenttext: + for ( ; pos < len; pos++) + { + if (vStringChar (line, pos) == '*' && pos < len - 1 && + vStringChar (line, pos+1) == ')') + { + pos += 2; + incomment = FALSE; + goto programtext; + } + if (vStringChar (line, pos) == '}') + { + pos++; + incomment = FALSE; + goto programtext; + } + } + goto endofline; + stringtext: + for ( ; pos < len; pos++) + { + if (vStringChar (line, pos) == '\\') + { + if (pos < len - 1) pos++; + } + else if (vStringChar (line, pos) == '\'') + { + pos++; + /* support obsolete '' syntax */ + if (pos < len && vStringChar (line, pos) == '\'') + { + continue; + } + inquote = FALSE; + goto programtext; + } + } + } + endofline: + inquote = FALSE; /* This shouldn't really make a difference */ + } while (!feof (File.fp)); + vStringDelete (line); +} + +extern parserDefinition* BetaParser (void) +{ + static const char *const extensions [] = { "bet", NULL }; + parserDefinition* def = parserNew ("BETA"); + def->kinds = BetaKinds; + def->kindCount = KIND_COUNT (BetaKinds); + def->extensions = extensions; + def->parser = findBetaTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/c.c b/c.c new file mode 100644 index 0000000..0cf0a14 --- /dev/null +++ b/c.c @@ -0,0 +1,2932 @@ +/* +* $Id: c.c 689 2008-12-13 21:17:36Z elliotth $ +* +* Copyright (c) 1996-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for parsing and scanning C, C++ and Java +* source files. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include +#include + +#include "debug.h" +#include "entry.h" +#include "get.h" +#include "keyword.h" +#include "options.h" +#include "parse.h" +#include "read.h" +#include "routines.h" + +/* +* MACROS +*/ + +#define activeToken(st) ((st)->token [(int) (st)->tokenIndex]) +#define parentDecl(st) ((st)->parent == NULL ? \ + DECL_NONE : (st)->parent->declaration) +#define isType(token,t) (boolean) ((token)->type == (t)) +#define insideEnumBody(st) ((st)->parent == NULL ? FALSE : \ + (boolean) ((st)->parent->declaration == DECL_ENUM)) +#define isExternCDecl(st,c) (boolean) ((c) == STRING_SYMBOL && \ + ! (st)->haveQualifyingName && (st)->scope == SCOPE_EXTERN) + +#define isOneOf(c,s) (boolean) (strchr ((s), (c)) != NULL) + +#define isHighChar(c) ((c) != EOF && (unsigned char)(c) >= 0xc0) + +/* +* DATA DECLARATIONS +*/ + +enum { NumTokens = 3 }; + +typedef enum eException { + ExceptionNone, ExceptionEOF, ExceptionFormattingError, + ExceptionBraceFormattingError +} exception_t; + +/* Used to specify type of keyword. + */ +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_ATTRIBUTE, KEYWORD_ABSTRACT, + KEYWORD_BOOLEAN, KEYWORD_BYTE, KEYWORD_BAD_STATE, KEYWORD_BAD_TRANS, + KEYWORD_BIND, KEYWORD_BIND_VAR, KEYWORD_BIT, + KEYWORD_CASE, KEYWORD_CATCH, KEYWORD_CHAR, KEYWORD_CLASS, KEYWORD_CONST, + KEYWORD_CONSTRAINT, KEYWORD_COVERAGE_BLOCK, KEYWORD_COVERAGE_DEF, + KEYWORD_DEFAULT, KEYWORD_DELEGATE, KEYWORD_DELETE, KEYWORD_DO, + KEYWORD_DOUBLE, + KEYWORD_ELSE, KEYWORD_ENUM, KEYWORD_EXPLICIT, KEYWORD_EXTERN, + KEYWORD_EXTENDS, KEYWORD_EVENT, + KEYWORD_FINAL, KEYWORD_FLOAT, KEYWORD_FOR, KEYWORD_FOREACH, + KEYWORD_FRIEND, KEYWORD_FUNCTION, + KEYWORD_GOTO, + KEYWORD_IF, KEYWORD_IMPLEMENTS, KEYWORD_IMPORT, KEYWORD_INLINE, KEYWORD_INT, + KEYWORD_INOUT, KEYWORD_INPUT, KEYWORD_INTEGER, KEYWORD_INTERFACE, + KEYWORD_INTERNAL, + KEYWORD_LOCAL, KEYWORD_LONG, + KEYWORD_M_BAD_STATE, KEYWORD_M_BAD_TRANS, KEYWORD_M_STATE, KEYWORD_M_TRANS, + KEYWORD_MUTABLE, + KEYWORD_NAMESPACE, KEYWORD_NEW, KEYWORD_NEWCOV, KEYWORD_NATIVE, + KEYWORD_OPERATOR, KEYWORD_OUTPUT, KEYWORD_OVERLOAD, KEYWORD_OVERRIDE, + KEYWORD_PACKED, KEYWORD_PORT, KEYWORD_PACKAGE, KEYWORD_PRIVATE, + KEYWORD_PROGRAM, KEYWORD_PROTECTED, KEYWORD_PUBLIC, + KEYWORD_REGISTER, KEYWORD_RETURN, + KEYWORD_SHADOW, KEYWORD_STATE, + KEYWORD_SHORT, KEYWORD_SIGNED, KEYWORD_STATIC, KEYWORD_STRING, + KEYWORD_STRUCT, KEYWORD_SWITCH, KEYWORD_SYNCHRONIZED, + KEYWORD_TASK, KEYWORD_TEMPLATE, KEYWORD_THIS, KEYWORD_THROW, + KEYWORD_THROWS, KEYWORD_TRANSIENT, KEYWORD_TRANS, KEYWORD_TRANSITION, + KEYWORD_TRY, KEYWORD_TYPEDEF, KEYWORD_TYPENAME, + KEYWORD_UINT, KEYWORD_ULONG, KEYWORD_UNION, KEYWORD_UNSIGNED, KEYWORD_USHORT, + KEYWORD_USING, + KEYWORD_VIRTUAL, KEYWORD_VOID, KEYWORD_VOLATILE, + KEYWORD_WCHAR_T, KEYWORD_WHILE +} keywordId; + +/* Used to determine whether keyword is valid for the current language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; + short isValid [5]; /* indicates languages for which kw is valid */ +} keywordDesc; + +/* Used for reporting the type of object parsed by nextToken (). + */ +typedef enum eTokenType { + TOKEN_NONE, /* none */ + TOKEN_ARGS, /* a parenthetical pair and its contents */ + TOKEN_BRACE_CLOSE, + TOKEN_BRACE_OPEN, + TOKEN_COLON, /* the colon character */ + TOKEN_COMMA, /* the comma character */ + TOKEN_DOUBLE_COLON, /* double colon indicates nested-name-specifier */ + TOKEN_KEYWORD, + TOKEN_NAME, /* an unknown name */ + TOKEN_PACKAGE, /* a Java package name */ + TOKEN_PAREN_NAME, /* a single name in parentheses */ + TOKEN_SEMICOLON, /* the semicolon character */ + TOKEN_SPEC, /* a storage class specifier, qualifier, type, etc. */ + TOKEN_COUNT +} tokenType; + +/* This describes the scoping of the current statement. + */ +typedef enum eTagScope { + SCOPE_GLOBAL, /* no storage class specified */ + SCOPE_STATIC, /* static storage class */ + SCOPE_EXTERN, /* external storage class */ + SCOPE_FRIEND, /* declares access only */ + SCOPE_TYPEDEF, /* scoping depends upon context */ + SCOPE_COUNT +} tagScope; + +typedef enum eDeclaration { + DECL_NONE, + DECL_BASE, /* base type (default) */ + DECL_CLASS, + DECL_ENUM, + DECL_EVENT, + DECL_FUNCTION, + DECL_IGNORE, /* non-taggable "declaration" */ + DECL_INTERFACE, + DECL_NAMESPACE, + DECL_NOMANGLE, /* C++ name demangling block */ + DECL_PACKAGE, + DECL_PROGRAM, /* Vera program */ + DECL_STRUCT, + DECL_TASK, /* Vera task */ + DECL_UNION, + DECL_COUNT +} declType; + +typedef enum eVisibilityType { + ACCESS_UNDEFINED, + ACCESS_LOCAL, + ACCESS_PRIVATE, + ACCESS_PROTECTED, + ACCESS_PUBLIC, + ACCESS_DEFAULT, /* Java-specific */ + ACCESS_COUNT +} accessType; + +/* Information about the parent class of a member (if any). + */ +typedef struct sMemberInfo { + accessType access; /* access of current statement */ + accessType accessDefault; /* access default for current statement */ +} memberInfo; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + vString* name; /* the name of the token */ + unsigned long lineNumber; /* line number of tag */ + fpos_t filePosition; /* file position of line containing name */ +} tokenInfo; + +typedef enum eImplementation { + IMP_DEFAULT, + IMP_ABSTRACT, + IMP_VIRTUAL, + IMP_PURE_VIRTUAL, + IMP_COUNT +} impType; + +/* Describes the statement currently undergoing analysis. + */ +typedef struct sStatementInfo { + tagScope scope; + declType declaration; /* specifier associated with TOKEN_SPEC */ + boolean gotName; /* was a name parsed yet? */ + boolean haveQualifyingName; /* do we have a name we are considering? */ + boolean gotParenName; /* was a name inside parentheses parsed yet? */ + boolean gotArgs; /* was a list of parameters parsed yet? */ + boolean isPointer; /* is 'name' a pointer? */ + boolean inFunction; /* are we inside of a function? */ + boolean assignment; /* have we handled an '='? */ + boolean notVariable; /* has a variable declaration been disqualified ? */ + impType implementation; /* abstract or concrete implementation? */ + unsigned int tokenIndex; /* currently active token */ + tokenInfo* token [(int) NumTokens]; + tokenInfo* context; /* accumulated scope of current statement */ + tokenInfo* blockName; /* name of current block */ + memberInfo member; /* information regarding parent class/struct */ + vString* parentClasses; /* parent classes */ + struct sStatementInfo *parent; /* statement we are nested within */ +} statementInfo; + +/* Describes the type of tag being generated. + */ +typedef enum eTagType { + TAG_UNDEFINED, + TAG_CLASS, /* class name */ + TAG_ENUM, /* enumeration name */ + TAG_ENUMERATOR, /* enumerator (enumeration value) */ + TAG_EVENT, /* event */ + TAG_FIELD, /* field (Java) */ + TAG_FUNCTION, /* function definition */ + TAG_INTERFACE, /* interface declaration */ + TAG_LOCAL, /* local variable definition */ + TAG_MEMBER, /* structure, class or interface member */ + TAG_METHOD, /* method declaration */ + TAG_NAMESPACE, /* namespace name */ + TAG_PACKAGE, /* package name */ + TAG_PROGRAM, /* program name */ + TAG_PROPERTY, /* property name */ + TAG_PROTOTYPE, /* function prototype or declaration */ + TAG_STRUCT, /* structure name */ + TAG_TASK, /* task name */ + TAG_TYPEDEF, /* typedef name */ + TAG_UNION, /* union name */ + TAG_VARIABLE, /* variable definition */ + TAG_EXTERN_VAR, /* external variable declaration */ + TAG_COUNT /* must be last */ +} tagType; + +typedef struct sParenInfo { + boolean isPointer; + boolean isParamList; + boolean isKnrParamList; + boolean isNameCandidate; + boolean invalidContents; + boolean nestedArgs; + unsigned int parameterCount; +} parenInfo; + +/* +* DATA DEFINITIONS +*/ + +static jmp_buf Exception; + +static langType Lang_c; +static langType Lang_cpp; +static langType Lang_csharp; +static langType Lang_java; +static langType Lang_vera; +static vString *Signature; +static boolean CollectingSignature; + +/* Number used to uniquely identify anonymous structs and unions. */ +static int AnonymousID = 0; + +/* Used to index into the CKinds table. */ +typedef enum { + CK_UNDEFINED = -1, + CK_CLASS, CK_DEFINE, CK_ENUMERATOR, CK_FUNCTION, + CK_ENUMERATION, CK_LOCAL, CK_MEMBER, CK_NAMESPACE, CK_PROTOTYPE, + CK_STRUCT, CK_TYPEDEF, CK_UNION, CK_VARIABLE, + CK_EXTERN_VARIABLE +} cKind; + +static kindOption CKinds [] = { + { TRUE, 'c', "class", "classes"}, + { TRUE, 'd', "macro", "macro definitions"}, + { TRUE, 'e', "enumerator", "enumerators (values inside an enumeration)"}, + { TRUE, 'f', "function", "function definitions"}, + { TRUE, 'g', "enum", "enumeration names"}, + { FALSE, 'l', "local", "local variables"}, + { TRUE, 'm', "member", "class, struct, and union members"}, + { TRUE, 'n', "namespace", "namespaces"}, + { FALSE, 'p', "prototype", "function prototypes"}, + { TRUE, 's', "struct", "structure names"}, + { TRUE, 't', "typedef", "typedefs"}, + { TRUE, 'u', "union", "union names"}, + { TRUE, 'v', "variable", "variable definitions"}, + { FALSE, 'x', "externvar", "external and forward variable declarations"}, +}; + +typedef enum { + CSK_UNDEFINED = -1, + CSK_CLASS, CSK_DEFINE, CSK_ENUMERATOR, CSK_EVENT, CSK_FIELD, + CSK_ENUMERATION, CSK_INTERFACE, CSK_LOCAL, CSK_METHOD, + CSK_NAMESPACE, CSK_PROPERTY, CSK_STRUCT, CSK_TYPEDEF +} csharpKind; + +static kindOption CsharpKinds [] = { + { TRUE, 'c', "class", "classes"}, + { TRUE, 'd', "macro", "macro definitions"}, + { TRUE, 'e', "enumerator", "enumerators (values inside an enumeration)"}, + { TRUE, 'E', "event", "events"}, + { TRUE, 'f', "field", "fields"}, + { TRUE, 'g', "enum", "enumeration names"}, + { TRUE, 'i', "interface", "interfaces"}, + { FALSE, 'l', "local", "local variables"}, + { TRUE, 'm', "method", "methods"}, + { TRUE, 'n', "namespace", "namespaces"}, + { TRUE, 'p', "property", "properties"}, + { TRUE, 's', "struct", "structure names"}, + { TRUE, 't', "typedef", "typedefs"}, +}; + +/* Used to index into the JavaKinds table. */ +typedef enum { + JK_UNDEFINED = -1, + JK_CLASS, JK_ENUM_CONSTANT, JK_FIELD, JK_ENUM, JK_INTERFACE, + JK_LOCAL, JK_METHOD, JK_PACKAGE, JK_ACCESS, JK_CLASS_PREFIX +} javaKind; + +static kindOption JavaKinds [] = { + { TRUE, 'c', "class", "classes"}, + { TRUE, 'e', "enum constant", "enum constants"}, + { TRUE, 'f', "field", "fields"}, + { TRUE, 'g', "enum", "enum types"}, + { TRUE, 'i', "interface", "interfaces"}, + { FALSE, 'l', "local", "local variables"}, + { TRUE, 'm', "method", "methods"}, + { TRUE, 'p', "package", "packages"}, +}; + +/* Used to index into the VeraKinds table. */ +typedef enum { + VK_UNDEFINED = -1, + VK_CLASS, VK_DEFINE, VK_ENUMERATOR, VK_FUNCTION, + VK_ENUMERATION, VK_LOCAL, VK_MEMBER, VK_PROGRAM, VK_PROTOTYPE, + VK_TASK, VK_TYPEDEF, VK_VARIABLE, + VK_EXTERN_VARIABLE +} veraKind; + +static kindOption VeraKinds [] = { + { TRUE, 'c', "class", "classes"}, + { TRUE, 'd', "macro", "macro definitions"}, + { TRUE, 'e', "enumerator", "enumerators (values inside an enumeration)"}, + { TRUE, 'f', "function", "function definitions"}, + { TRUE, 'g', "enum", "enumeration names"}, + { FALSE, 'l', "local", "local variables"}, + { TRUE, 'm', "member", "class, struct, and union members"}, + { TRUE, 'p', "program", "programs"}, + { FALSE, 'P', "prototype", "function prototypes"}, + { TRUE, 't', "task", "tasks"}, + { TRUE, 'T', "typedef", "typedefs"}, + { TRUE, 'v', "variable", "variable definitions"}, + { FALSE, 'x', "externvar", "external variable declarations"} +}; + +static const keywordDesc KeywordTable [] = { + /* C++ */ + /* ANSI C | C# Java */ + /* | | | | Vera */ + /* keyword keyword ID | | | | | */ + { "__attribute__", KEYWORD_ATTRIBUTE, { 1, 1, 1, 0, 0 } }, + { "abstract", KEYWORD_ABSTRACT, { 0, 0, 1, 1, 0 } }, + { "bad_state", KEYWORD_BAD_STATE, { 0, 0, 0, 0, 1 } }, + { "bad_trans", KEYWORD_BAD_TRANS, { 0, 0, 0, 0, 1 } }, + { "bind", KEYWORD_BIND, { 0, 0, 0, 0, 1 } }, + { "bind_var", KEYWORD_BIND_VAR, { 0, 0, 0, 0, 1 } }, + { "bit", KEYWORD_BIT, { 0, 0, 0, 0, 1 } }, + { "boolean", KEYWORD_BOOLEAN, { 0, 0, 0, 1, 0 } }, + { "byte", KEYWORD_BYTE, { 0, 0, 0, 1, 0 } }, + { "case", KEYWORD_CASE, { 1, 1, 1, 1, 0 } }, + { "catch", KEYWORD_CATCH, { 0, 1, 1, 0, 0 } }, + { "char", KEYWORD_CHAR, { 1, 1, 1, 1, 0 } }, + { "class", KEYWORD_CLASS, { 0, 1, 1, 1, 1 } }, + { "const", KEYWORD_CONST, { 1, 1, 1, 1, 0 } }, + { "constraint", KEYWORD_CONSTRAINT, { 0, 0, 0, 0, 1 } }, + { "coverage_block", KEYWORD_COVERAGE_BLOCK, { 0, 0, 0, 0, 1 } }, + { "coverage_def", KEYWORD_COVERAGE_DEF, { 0, 0, 0, 0, 1 } }, + { "do", KEYWORD_DO, { 1, 1, 1, 1, 0 } }, + { "default", KEYWORD_DEFAULT, { 1, 1, 1, 1, 0 } }, + { "delegate", KEYWORD_DELEGATE, { 0, 0, 1, 0, 0 } }, + { "delete", KEYWORD_DELETE, { 0, 1, 0, 0, 0 } }, + { "double", KEYWORD_DOUBLE, { 1, 1, 1, 1, 0 } }, + { "else", KEYWORD_ELSE, { 1, 1, 1, 1, 0 } }, + { "enum", KEYWORD_ENUM, { 1, 1, 1, 1, 1 } }, + { "event", KEYWORD_EVENT, { 0, 0, 1, 0, 1 } }, + { "explicit", KEYWORD_EXPLICIT, { 0, 1, 1, 0, 0 } }, + { "extends", KEYWORD_EXTENDS, { 0, 0, 0, 1, 1 } }, + { "extern", KEYWORD_EXTERN, { 1, 1, 1, 0, 1 } }, + { "final", KEYWORD_FINAL, { 0, 0, 0, 1, 0 } }, + { "float", KEYWORD_FLOAT, { 1, 1, 1, 1, 0 } }, + { "for", KEYWORD_FOR, { 1, 1, 1, 1, 0 } }, + { "foreach", KEYWORD_FOREACH, { 0, 0, 1, 0, 0 } }, + { "friend", KEYWORD_FRIEND, { 0, 1, 0, 0, 0 } }, + { "function", KEYWORD_FUNCTION, { 0, 0, 0, 0, 1 } }, + { "goto", KEYWORD_GOTO, { 1, 1, 1, 1, 0 } }, + { "if", KEYWORD_IF, { 1, 1, 1, 1, 0 } }, + { "implements", KEYWORD_IMPLEMENTS, { 0, 0, 0, 1, 0 } }, + { "import", KEYWORD_IMPORT, { 0, 0, 0, 1, 0 } }, + { "inline", KEYWORD_INLINE, { 0, 1, 0, 0, 0 } }, + { "inout", KEYWORD_INOUT, { 0, 0, 0, 0, 1 } }, + { "input", KEYWORD_INPUT, { 0, 0, 0, 0, 1 } }, + { "int", KEYWORD_INT, { 1, 1, 1, 1, 0 } }, + { "integer", KEYWORD_INTEGER, { 0, 0, 0, 0, 1 } }, + { "interface", KEYWORD_INTERFACE, { 0, 0, 1, 1, 1 } }, + { "internal", KEYWORD_INTERNAL, { 0, 0, 1, 0, 0 } }, + { "local", KEYWORD_LOCAL, { 0, 0, 0, 0, 1 } }, + { "long", KEYWORD_LONG, { 1, 1, 1, 1, 0 } }, + { "m_bad_state", KEYWORD_M_BAD_STATE, { 0, 0, 0, 0, 1 } }, + { "m_bad_trans", KEYWORD_M_BAD_TRANS, { 0, 0, 0, 0, 1 } }, + { "m_state", KEYWORD_M_STATE, { 0, 0, 0, 0, 1 } }, + { "m_trans", KEYWORD_M_TRANS, { 0, 0, 0, 0, 1 } }, + { "mutable", KEYWORD_MUTABLE, { 0, 1, 0, 0, 0 } }, + { "namespace", KEYWORD_NAMESPACE, { 0, 1, 1, 0, 0 } }, + { "native", KEYWORD_NATIVE, { 0, 0, 0, 1, 0 } }, + { "new", KEYWORD_NEW, { 0, 1, 1, 1, 0 } }, + { "newcov", KEYWORD_NEWCOV, { 0, 0, 0, 0, 1 } }, + { "operator", KEYWORD_OPERATOR, { 0, 1, 1, 0, 0 } }, + { "output", KEYWORD_OUTPUT, { 0, 0, 0, 0, 1 } }, + { "overload", KEYWORD_OVERLOAD, { 0, 1, 0, 0, 0 } }, + { "override", KEYWORD_OVERRIDE, { 0, 0, 1, 0, 0 } }, + { "package", KEYWORD_PACKAGE, { 0, 0, 0, 1, 0 } }, + { "packed", KEYWORD_PACKED, { 0, 0, 0, 0, 1 } }, + { "port", KEYWORD_PORT, { 0, 0, 0, 0, 1 } }, + { "private", KEYWORD_PRIVATE, { 0, 1, 1, 1, 0 } }, + { "program", KEYWORD_PROGRAM, { 0, 0, 0, 0, 1 } }, + { "protected", KEYWORD_PROTECTED, { 0, 1, 1, 1, 1 } }, + { "public", KEYWORD_PUBLIC, { 0, 1, 1, 1, 1 } }, + { "register", KEYWORD_REGISTER, { 1, 1, 0, 0, 0 } }, + { "return", KEYWORD_RETURN, { 1, 1, 1, 1, 0 } }, + { "shadow", KEYWORD_SHADOW, { 0, 0, 0, 0, 1 } }, + { "short", KEYWORD_SHORT, { 1, 1, 1, 1, 0 } }, + { "signed", KEYWORD_SIGNED, { 1, 1, 0, 0, 0 } }, + { "state", KEYWORD_STATE, { 0, 0, 0, 0, 1 } }, + { "static", KEYWORD_STATIC, { 1, 1, 1, 1, 1 } }, + { "string", KEYWORD_STRING, { 0, 0, 1, 0, 1 } }, + { "struct", KEYWORD_STRUCT, { 1, 1, 1, 0, 0 } }, + { "switch", KEYWORD_SWITCH, { 1, 1, 1, 1, 0 } }, + { "synchronized", KEYWORD_SYNCHRONIZED, { 0, 0, 0, 1, 0 } }, + { "task", KEYWORD_TASK, { 0, 0, 0, 0, 1 } }, + { "template", KEYWORD_TEMPLATE, { 0, 1, 0, 0, 0 } }, + { "this", KEYWORD_THIS, { 0, 1, 1, 1, 0 } }, + { "throw", KEYWORD_THROW, { 0, 1, 1, 1, 0 } }, + { "throws", KEYWORD_THROWS, { 0, 0, 0, 1, 0 } }, + { "trans", KEYWORD_TRANS, { 0, 0, 0, 0, 1 } }, + { "transition", KEYWORD_TRANSITION, { 0, 0, 0, 0, 1 } }, + { "transient", KEYWORD_TRANSIENT, { 0, 0, 0, 1, 0 } }, + { "try", KEYWORD_TRY, { 0, 1, 1, 0, 0 } }, + { "typedef", KEYWORD_TYPEDEF, { 1, 1, 1, 0, 1 } }, + { "typename", KEYWORD_TYPENAME, { 0, 1, 0, 0, 0 } }, + { "uint", KEYWORD_UINT, { 0, 0, 1, 0, 0 } }, + { "ulong", KEYWORD_ULONG, { 0, 0, 1, 0, 0 } }, + { "union", KEYWORD_UNION, { 1, 1, 0, 0, 0 } }, + { "unsigned", KEYWORD_UNSIGNED, { 1, 1, 1, 0, 0 } }, + { "ushort", KEYWORD_USHORT, { 0, 0, 1, 0, 0 } }, + { "using", KEYWORD_USING, { 0, 1, 1, 0, 0 } }, + { "virtual", KEYWORD_VIRTUAL, { 0, 1, 1, 0, 1 } }, + { "void", KEYWORD_VOID, { 1, 1, 1, 1, 1 } }, + { "volatile", KEYWORD_VOLATILE, { 1, 1, 1, 1, 0 } }, + { "wchar_t", KEYWORD_WCHAR_T, { 1, 1, 1, 0, 0 } }, + { "while", KEYWORD_WHILE, { 1, 1, 1, 1, 0 } } +}; + +/* +* FUNCTION PROTOTYPES +*/ +static void createTags (const unsigned int nestLevel, statementInfo *const parent); + +/* +* FUNCTION DEFINITIONS +*/ + +extern boolean includingDefineTags (void) +{ + return CKinds [CK_DEFINE].enabled; +} + +/* +* Token management +*/ + +static void initToken (tokenInfo* const token) +{ + token->type = TOKEN_NONE; + token->keyword = KEYWORD_NONE; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + vStringClear (token->name); +} + +static void advanceToken (statementInfo* const st) +{ + if (st->tokenIndex >= (unsigned int) NumTokens - 1) + st->tokenIndex = 0; + else + ++st->tokenIndex; + initToken (st->token [st->tokenIndex]); +} + +static tokenInfo *prevToken (const statementInfo *const st, unsigned int n) +{ + unsigned int tokenIndex; + unsigned int num = (unsigned int) NumTokens; + Assert (n < num); + tokenIndex = (st->tokenIndex + num - n) % num; + return st->token [tokenIndex]; +} + +static void setToken (statementInfo *const st, const tokenType type) +{ + tokenInfo *token; + token = activeToken (st); + initToken (token); + token->type = type; +} + +static void retardToken (statementInfo *const st) +{ + if (st->tokenIndex == 0) + st->tokenIndex = (unsigned int) NumTokens - 1; + else + --st->tokenIndex; + setToken (st, TOKEN_NONE); +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + token->name = vStringNew (); + initToken (token); + return token; +} + +static void deleteToken (tokenInfo *const token) +{ + if (token != NULL) + { + vStringDelete (token->name); + eFree (token); + } +} + +static const char *accessString (const accessType access) +{ + static const char *const names [] = { + "?", "local", "private", "protected", "public", "default" + }; + Assert (sizeof (names) / sizeof (names [0]) == ACCESS_COUNT); + Assert ((int) access < ACCESS_COUNT); + return names [(int) access]; +} + +static const char *implementationString (const impType imp) +{ + static const char *const names [] ={ + "?", "abstract", "virtual", "pure virtual" + }; + Assert (sizeof (names) / sizeof (names [0]) == IMP_COUNT); + Assert ((int) imp < IMP_COUNT); + return names [(int) imp]; +} + +/* +* Debugging functions +*/ + +#ifdef DEBUG + +#define boolString(c) ((c) ? "TRUE" : "FALSE") + +static const char *tokenString (const tokenType type) +{ + static const char *const names [] = { + "none", "args", "}", "{", "colon", "comma", "double colon", "keyword", + "name", "package", "paren-name", "semicolon", "specifier" + }; + Assert (sizeof (names) / sizeof (names [0]) == TOKEN_COUNT); + Assert ((int) type < TOKEN_COUNT); + return names [(int) type]; +} + +static const char *scopeString (const tagScope scope) +{ + static const char *const names [] = { + "global", "static", "extern", "friend", "typedef" + }; + Assert (sizeof (names) / sizeof (names [0]) == SCOPE_COUNT); + Assert ((int) scope < SCOPE_COUNT); + return names [(int) scope]; +} + +static const char *declString (const declType declaration) +{ + static const char *const names [] = { + "?", "base", "class", "enum", "event", "function", "ignore", + "interface", "namespace", "no mangle", "package", "program", + "struct", "task", "union", + }; + Assert (sizeof (names) / sizeof (names [0]) == DECL_COUNT); + Assert ((int) declaration < DECL_COUNT); + return names [(int) declaration]; +} + +static const char *keywordString (const keywordId keyword) +{ + const size_t count = sizeof (KeywordTable) / sizeof (KeywordTable [0]); + const char *name = "none"; + size_t i; + for (i = 0 ; i < count ; ++i) + { + const keywordDesc *p = &KeywordTable [i]; + if (p->id == keyword) + { + name = p->name; + break; + } + } + return name; +} + +static void __unused__ pt (tokenInfo *const token) +{ + if (isType (token, TOKEN_NAME)) + printf ("type: %-12s: %-13s line: %lu\n", + tokenString (token->type), vStringValue (token->name), + token->lineNumber); + else if (isType (token, TOKEN_KEYWORD)) + printf ("type: %-12s: %-13s line: %lu\n", + tokenString (token->type), keywordString (token->keyword), + token->lineNumber); + else + printf ("type: %-12s line: %lu\n", + tokenString (token->type), token->lineNumber); +} + +static void __unused__ ps (statementInfo *const st) +{ + unsigned int i; + printf ("scope: %s decl: %s gotName: %s gotParenName: %s\n", + scopeString (st->scope), declString (st->declaration), + boolString (st->gotName), boolString (st->gotParenName)); + printf ("haveQualifyingName: %s\n", boolString (st->haveQualifyingName)); + printf ("access: %s default: %s\n", accessString (st->member.access), + accessString (st->member.accessDefault)); + printf ("token : "); + pt (activeToken (st)); + for (i = 1 ; i < (unsigned int) NumTokens ; ++i) + { + printf ("prev %u : ", i); + pt (prevToken (st, i)); + } + printf ("context: "); + pt (st->context); +} + +#endif + +/* +* Statement management +*/ + +static boolean isContextualKeyword (const tokenInfo *const token) +{ + boolean result; + switch (token->keyword) + { + case KEYWORD_CLASS: + case KEYWORD_ENUM: + case KEYWORD_INTERFACE: + case KEYWORD_NAMESPACE: + case KEYWORD_STRUCT: + case KEYWORD_UNION: + result = TRUE; + break; + + default: result = FALSE; break; + } + return result; +} + +static boolean isContextualStatement (const statementInfo *const st) +{ + boolean result = FALSE; + if (st != NULL) switch (st->declaration) + { + case DECL_CLASS: + case DECL_ENUM: + case DECL_INTERFACE: + case DECL_NAMESPACE: + case DECL_STRUCT: + case DECL_UNION: + result = TRUE; + break; + + default: result = FALSE; break; + } + return result; +} + +static boolean isMember (const statementInfo *const st) +{ + boolean result; + if (isType (st->context, TOKEN_NAME)) + result = TRUE; + else + result = (boolean) + (st->parent != NULL && isContextualStatement (st->parent)); + return result; +} + +static void initMemberInfo (statementInfo *const st) +{ + accessType accessDefault = ACCESS_UNDEFINED; + + if (st->parent != NULL) switch (st->parent->declaration) + { + case DECL_ENUM: + accessDefault = (isLanguage (Lang_java) ? ACCESS_PUBLIC : ACCESS_UNDEFINED); + break; + case DECL_NAMESPACE: + accessDefault = ACCESS_UNDEFINED; + break; + + case DECL_CLASS: + if (isLanguage (Lang_java)) + accessDefault = ACCESS_DEFAULT; + else + accessDefault = ACCESS_PRIVATE; + break; + + case DECL_INTERFACE: + case DECL_STRUCT: + case DECL_UNION: + accessDefault = ACCESS_PUBLIC; + break; + + default: break; + } + st->member.accessDefault = accessDefault; + st->member.access = accessDefault; +} + +static void reinitStatement (statementInfo *const st, const boolean partial) +{ + unsigned int i; + + if (! partial) + { + st->scope = SCOPE_GLOBAL; + if (isContextualStatement (st->parent)) + st->declaration = DECL_BASE; + else + st->declaration = DECL_NONE; + } + st->gotParenName = FALSE; + st->isPointer = FALSE; + st->inFunction = FALSE; + st->assignment = FALSE; + st->notVariable = FALSE; + st->implementation = IMP_DEFAULT; + st->gotArgs = FALSE; + st->gotName = FALSE; + st->haveQualifyingName = FALSE; + st->tokenIndex = 0; + + if (st->parent != NULL) + st->inFunction = st->parent->inFunction; + + for (i = 0 ; i < (unsigned int) NumTokens ; ++i) + initToken (st->token [i]); + + initToken (st->context); + + /* Keep the block name, so that a variable following after a comma will + * still have the structure name. + */ + if (! partial) + initToken (st->blockName); + + vStringClear (st->parentClasses); + + /* Init member info. + */ + if (! partial) + st->member.access = st->member.accessDefault; +} + +static void initStatement (statementInfo *const st, statementInfo *const parent) +{ + st->parent = parent; + initMemberInfo (st); + reinitStatement (st, FALSE); +} + +/* +* Tag generation functions +*/ +static cKind cTagKind (const tagType type) +{ + cKind result = CK_UNDEFINED; + switch (type) + { + case TAG_CLASS: result = CK_CLASS; break; + case TAG_ENUM: result = CK_ENUMERATION; break; + case TAG_ENUMERATOR: result = CK_ENUMERATOR; break; + case TAG_FUNCTION: result = CK_FUNCTION; break; + case TAG_LOCAL: result = CK_LOCAL; break; + case TAG_MEMBER: result = CK_MEMBER; break; + case TAG_NAMESPACE: result = CK_NAMESPACE; break; + case TAG_PROTOTYPE: result = CK_PROTOTYPE; break; + case TAG_STRUCT: result = CK_STRUCT; break; + case TAG_TYPEDEF: result = CK_TYPEDEF; break; + case TAG_UNION: result = CK_UNION; break; + case TAG_VARIABLE: result = CK_VARIABLE; break; + case TAG_EXTERN_VAR: result = CK_EXTERN_VARIABLE; break; + + default: Assert ("Bad C tag type" == NULL); break; + } + return result; +} + +static csharpKind csharpTagKind (const tagType type) +{ + csharpKind result = CSK_UNDEFINED; + switch (type) + { + case TAG_CLASS: result = CSK_CLASS; break; + case TAG_ENUM: result = CSK_ENUMERATION; break; + case TAG_ENUMERATOR: result = CSK_ENUMERATOR; break; + case TAG_EVENT: result = CSK_EVENT; break; + case TAG_FIELD: result = CSK_FIELD ; break; + case TAG_INTERFACE: result = CSK_INTERFACE; break; + case TAG_LOCAL: result = CSK_LOCAL; break; + case TAG_METHOD: result = CSK_METHOD; break; + case TAG_NAMESPACE: result = CSK_NAMESPACE; break; + case TAG_PROPERTY: result = CSK_PROPERTY; break; + case TAG_STRUCT: result = CSK_STRUCT; break; + case TAG_TYPEDEF: result = CSK_TYPEDEF; break; + + default: Assert ("Bad C# tag type" == NULL); break; + } + return result; +} + +static javaKind javaTagKind (const tagType type) +{ + javaKind result = JK_UNDEFINED; + switch (type) + { + case TAG_CLASS: result = JK_CLASS; break; + case TAG_ENUM: result = JK_ENUM; break; + case TAG_ENUMERATOR: result = JK_ENUM_CONSTANT; break; + case TAG_FIELD: result = JK_FIELD; break; + case TAG_INTERFACE: result = JK_INTERFACE; break; + case TAG_LOCAL: result = JK_LOCAL; break; + case TAG_METHOD: result = JK_METHOD; break; + case TAG_PACKAGE: result = JK_PACKAGE; break; + + default: Assert ("Bad Java tag type" == NULL); break; + } + return result; +} + +static veraKind veraTagKind (const tagType type) { + veraKind result = VK_UNDEFINED; + switch (type) + { + case TAG_CLASS: result = VK_CLASS; break; + case TAG_ENUM: result = VK_ENUMERATION; break; + case TAG_ENUMERATOR: result = VK_ENUMERATOR; break; + case TAG_FUNCTION: result = VK_FUNCTION; break; + case TAG_LOCAL: result = VK_LOCAL; break; + case TAG_MEMBER: result = VK_MEMBER; break; + case TAG_PROGRAM: result = VK_PROGRAM; break; + case TAG_PROTOTYPE: result = VK_PROTOTYPE; break; + case TAG_TASK: result = VK_TASK; break; + case TAG_TYPEDEF: result = VK_TYPEDEF; break; + case TAG_VARIABLE: result = VK_VARIABLE; break; + case TAG_EXTERN_VAR: result = VK_EXTERN_VARIABLE; break; + + default: Assert ("Bad Vera tag type" == NULL); break; + } + return result; +} + +static const char *tagName (const tagType type) +{ + const char* result; + if (isLanguage (Lang_csharp)) + result = CsharpKinds [csharpTagKind (type)].name; + else if (isLanguage (Lang_java)) + result = JavaKinds [javaTagKind (type)].name; + else if (isLanguage (Lang_vera)) + result = VeraKinds [veraTagKind (type)].name; + else + result = CKinds [cTagKind (type)].name; + return result; +} + +static int tagLetter (const tagType type) +{ + int result; + if (isLanguage (Lang_csharp)) + result = CsharpKinds [csharpTagKind (type)].letter; + else if (isLanguage (Lang_java)) + result = JavaKinds [javaTagKind (type)].letter; + else if (isLanguage (Lang_vera)) + result = VeraKinds [veraTagKind (type)].letter; + else + result = CKinds [cTagKind (type)].letter; + return result; +} + +static boolean includeTag (const tagType type, const boolean isFileScope) +{ + boolean result; + if (isFileScope && ! Option.include.fileScope) + result = FALSE; + else if (isLanguage (Lang_csharp)) + result = CsharpKinds [csharpTagKind (type)].enabled; + else if (isLanguage (Lang_java)) + result = JavaKinds [javaTagKind (type)].enabled; + else if (isLanguage (Lang_vera)) + result = VeraKinds [veraTagKind (type)].enabled; + else + result = CKinds [cTagKind (type)].enabled; + return result; +} + +static tagType declToTagType (const declType declaration) +{ + tagType type = TAG_UNDEFINED; + + switch (declaration) + { + case DECL_CLASS: type = TAG_CLASS; break; + case DECL_ENUM: type = TAG_ENUM; break; + case DECL_EVENT: type = TAG_EVENT; break; + case DECL_FUNCTION: type = TAG_FUNCTION; break; + case DECL_INTERFACE: type = TAG_INTERFACE; break; + case DECL_NAMESPACE: type = TAG_NAMESPACE; break; + case DECL_PROGRAM: type = TAG_PROGRAM; break; + case DECL_TASK: type = TAG_TASK; break; + case DECL_STRUCT: type = TAG_STRUCT; break; + case DECL_UNION: type = TAG_UNION; break; + + default: Assert ("Unexpected declaration" == NULL); break; + } + return type; +} + +static const char* accessField (const statementInfo *const st) +{ + const char* result = NULL; + if (isLanguage (Lang_cpp) && st->scope == SCOPE_FRIEND) + result = "friend"; + else if (st->member.access != ACCESS_UNDEFINED) + result = accessString (st->member.access); + return result; +} + +static void addContextSeparator (vString *const scope) +{ + if (isLanguage (Lang_c) || isLanguage (Lang_cpp)) + vStringCatS (scope, "::"); + else if (isLanguage (Lang_java) || isLanguage (Lang_csharp)) + vStringCatS (scope, "."); +} + +static void addOtherFields (tagEntryInfo* const tag, const tagType type, + const statementInfo *const st, + vString *const scope, vString *const typeRef) +{ + /* For selected tag types, append an extension flag designating the + * parent object in which the tag is defined. + */ + switch (type) + { + default: break; + + case TAG_FUNCTION: + case TAG_METHOD: + case TAG_PROTOTYPE: + if (vStringLength (Signature) > 0) + tag->extensionFields.signature = vStringValue (Signature); + case TAG_CLASS: + case TAG_ENUM: + case TAG_ENUMERATOR: + case TAG_EVENT: + case TAG_FIELD: + case TAG_INTERFACE: + case TAG_MEMBER: + case TAG_NAMESPACE: + case TAG_PROPERTY: + case TAG_STRUCT: + case TAG_TASK: + case TAG_TYPEDEF: + case TAG_UNION: + if (vStringLength (scope) > 0 && + (isMember (st) || st->parent->declaration == DECL_NAMESPACE)) + { + if (isType (st->context, TOKEN_NAME)) + tag->extensionFields.scope [0] = tagName (TAG_CLASS); + else + tag->extensionFields.scope [0] = + tagName (declToTagType (parentDecl (st))); + tag->extensionFields.scope [1] = vStringValue (scope); + } + if ((type == TAG_CLASS || type == TAG_INTERFACE || + type == TAG_STRUCT) && vStringLength (st->parentClasses) > 0) + { + + tag->extensionFields.inheritance = + vStringValue (st->parentClasses); + } + if (st->implementation != IMP_DEFAULT && + (isLanguage (Lang_cpp) || isLanguage (Lang_csharp) || + isLanguage (Lang_java))) + { + tag->extensionFields.implementation = + implementationString (st->implementation); + } + if (isMember (st)) + { + tag->extensionFields.access = accessField (st); + } + break; + } + + /* Add typename info, type of the tag and name of struct/union/etc. */ + if ((type == TAG_TYPEDEF || type == TAG_VARIABLE || type == TAG_MEMBER) + && isContextualStatement(st)) + { + char *p; + + tag->extensionFields.typeRef [0] = + tagName (declToTagType (st->declaration)); + p = vStringValue (st->blockName->name); + + /* If there was no {} block get the name from the token before the + * name (current token is ';' or ',', previous token is the name). + */ + if (p == NULL || *p == '\0') + { + tokenInfo *const prev2 = prevToken (st, 2); + if (isType (prev2, TOKEN_NAME)) + p = vStringValue (prev2->name); + } + + /* Prepend the scope name if there is one. */ + if (vStringLength (scope) > 0) + { + vStringCopy(typeRef, scope); + addContextSeparator (typeRef); + vStringCatS(typeRef, p); + p = vStringValue (typeRef); + } + tag->extensionFields.typeRef [1] = p; + } +} + +static void findScopeHierarchy (vString *const string, + const statementInfo *const st) +{ + vStringClear (string); + if (isType (st->context, TOKEN_NAME)) + vStringCopy (string, st->context->name); + if (st->parent != NULL) + { + vString *temp = vStringNew (); + const statementInfo *s; + for (s = st->parent ; s != NULL ; s = s->parent) + { + if (isContextualStatement (s) || + s->declaration == DECL_NAMESPACE || + s->declaration == DECL_PROGRAM) + { + vStringCopy (temp, string); + vStringClear (string); + Assert (isType (s->blockName, TOKEN_NAME)); + if (isType (s->context, TOKEN_NAME) && + vStringLength (s->context->name) > 0) + { + vStringCat (string, s->context->name); + addContextSeparator (string); + } + vStringCat (string, s->blockName->name); + if (vStringLength (temp) > 0) + addContextSeparator (string); + vStringCat (string, temp); + } + } + vStringDelete (temp); + } +} + +static void makeExtraTagEntry (const tagType type, tagEntryInfo *const e, + vString *const scope) +{ + if (Option.include.qualifiedTags && + scope != NULL && vStringLength (scope) > 0) + { + vString *const scopedName = vStringNew (); + + if (type != TAG_ENUMERATOR) + vStringCopy (scopedName, scope); + else + { + /* remove last component (i.e. enumeration name) from scope */ + const char* const sc = vStringValue (scope); + const char* colon = strrchr (sc, ':'); + if (colon != NULL) + { + while (*colon == ':' && colon > sc) + --colon; + vStringNCopy (scopedName, scope, colon + 1 - sc); + } + } + if (vStringLength (scopedName) > 0) + { + addContextSeparator (scopedName); + vStringCatS (scopedName, e->name); + e->name = vStringValue (scopedName); + makeTagEntry (e); + } + vStringDelete (scopedName); + } +} + +static void makeTag (const tokenInfo *const token, + const statementInfo *const st, + boolean isFileScope, const tagType type) +{ + /* Nothing is really of file scope when it appears in a header file. + */ + isFileScope = (boolean) (isFileScope && ! isHeaderFile ()); + + if (isType (token, TOKEN_NAME) && vStringLength (token->name) > 0 && + includeTag (type, isFileScope)) + { + vString *scope = vStringNew (); + /* Use "typeRef" to store the typename from addOtherFields() until + * it's used in makeTagEntry(). + */ + vString *typeRef = vStringNew (); + tagEntryInfo e; + + initTagEntry (&e, vStringValue (token->name)); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.isFileScope = isFileScope; + e.kindName = tagName (type); + e.kind = tagLetter (type); + + findScopeHierarchy (scope, st); + addOtherFields (&e, type, st, scope, typeRef); + + makeTagEntry (&e); + makeExtraTagEntry (type, &e, scope); + vStringDelete (scope); + vStringDelete (typeRef); + } +} + +static boolean isValidTypeSpecifier (const declType declaration) +{ + boolean result; + switch (declaration) + { + case DECL_BASE: + case DECL_CLASS: + case DECL_ENUM: + case DECL_EVENT: + case DECL_STRUCT: + case DECL_UNION: + result = TRUE; + break; + + default: + result = FALSE; + break; + } + return result; +} + +static void qualifyEnumeratorTag (const statementInfo *const st, + const tokenInfo *const nameToken) +{ + if (isType (nameToken, TOKEN_NAME)) + makeTag (nameToken, st, TRUE, TAG_ENUMERATOR); +} + +static void qualifyFunctionTag (const statementInfo *const st, + const tokenInfo *const nameToken) +{ + if (isType (nameToken, TOKEN_NAME)) + { + tagType type; + const boolean isFileScope = + (boolean) (st->member.access == ACCESS_PRIVATE || + (!isMember (st) && st->scope == SCOPE_STATIC)); + if (isLanguage (Lang_java) || isLanguage (Lang_csharp)) + type = TAG_METHOD; + else if (isLanguage (Lang_vera) && st->declaration == DECL_TASK) + type = TAG_TASK; + else + type = TAG_FUNCTION; + makeTag (nameToken, st, isFileScope, type); + } +} + +static void qualifyFunctionDeclTag (const statementInfo *const st, + const tokenInfo *const nameToken) +{ + if (! isType (nameToken, TOKEN_NAME)) + ; + else if (isLanguage (Lang_java) || isLanguage (Lang_csharp)) + qualifyFunctionTag (st, nameToken); + else if (st->scope == SCOPE_TYPEDEF) + makeTag (nameToken, st, TRUE, TAG_TYPEDEF); + else if (isValidTypeSpecifier (st->declaration) && ! isLanguage (Lang_csharp)) + makeTag (nameToken, st, TRUE, TAG_PROTOTYPE); +} + +static void qualifyCompoundTag (const statementInfo *const st, + const tokenInfo *const nameToken) +{ + if (isType (nameToken, TOKEN_NAME)) + { + const tagType type = declToTagType (st->declaration); + const boolean fileScoped = (boolean) + (!(isLanguage (Lang_java) || + isLanguage (Lang_csharp) || + isLanguage (Lang_vera))); + + if (type != TAG_UNDEFINED) + makeTag (nameToken, st, fileScoped, type); + } +} + +static void qualifyBlockTag (statementInfo *const st, + const tokenInfo *const nameToken) +{ + switch (st->declaration) + { + case DECL_CLASS: + case DECL_ENUM: + case DECL_INTERFACE: + case DECL_NAMESPACE: + case DECL_PROGRAM: + case DECL_STRUCT: + case DECL_UNION: + qualifyCompoundTag (st, nameToken); + break; + default: break; + } +} + +static void qualifyVariableTag (const statementInfo *const st, + const tokenInfo *const nameToken) +{ + /* We have to watch that we do not interpret a declaration of the + * form "struct tag;" as a variable definition. In such a case, the + * token preceding the name will be a keyword. + */ + if (! isType (nameToken, TOKEN_NAME)) + ; + else if (st->scope == SCOPE_TYPEDEF) + makeTag (nameToken, st, TRUE, TAG_TYPEDEF); + else if (st->declaration == DECL_EVENT) + makeTag (nameToken, st, (boolean) (st->member.access == ACCESS_PRIVATE), + TAG_EVENT); + else if (st->declaration == DECL_PACKAGE) + makeTag (nameToken, st, FALSE, TAG_PACKAGE); + else if (isValidTypeSpecifier (st->declaration)) + { + if (st->notVariable) + ; + else if (isMember (st)) + { + if (isLanguage (Lang_java) || isLanguage (Lang_csharp)) + makeTag (nameToken, st, + (boolean) (st->member.access == ACCESS_PRIVATE), TAG_FIELD); + else if (st->scope == SCOPE_GLOBAL || st->scope == SCOPE_STATIC) + makeTag (nameToken, st, TRUE, TAG_MEMBER); + } + else + { + if (st->scope == SCOPE_EXTERN || ! st->haveQualifyingName) + makeTag (nameToken, st, FALSE, TAG_EXTERN_VAR); + else if (st->inFunction) + makeTag (nameToken, st, (boolean) (st->scope == SCOPE_STATIC), + TAG_LOCAL); + else + makeTag (nameToken, st, (boolean) (st->scope == SCOPE_STATIC), + TAG_VARIABLE); + } + } +} + +/* +* Parsing functions +*/ + +static int skipToOneOf (const char *const chars) +{ + int c; + do + c = cppGetc (); + while (c != EOF && c != '\0' && strchr (chars, c) == NULL); + return c; +} + +/* Skip to the next non-white character. + */ +static int skipToNonWhite (void) +{ + boolean found = FALSE; + int c; + +#if 0 + do + c = cppGetc (); + while (isspace (c)); +#else + while (1) + { + c = cppGetc (); + if (isspace (c)) + found = TRUE; + else + break; + } + if (CollectingSignature && found) + vStringPut (Signature, ' '); +#endif + + return c; +} + +/* Skips to the next brace in column 1. This is intended for cases where + * preprocessor constructs result in unbalanced braces. + */ +static void skipToFormattedBraceMatch (void) +{ + int c, next; + + c = cppGetc (); + next = cppGetc (); + while (c != EOF && (c != '\n' || next != '}')) + { + c = next; + next = cppGetc (); + } +} + +/* Skip to the matching character indicated by the pair string. If skipping + * to a matching brace and any brace is found within a different level of a + * #if conditional statement while brace formatting is in effect, we skip to + * the brace matched by its formatting. It is assumed that we have already + * read the character which starts the group (i.e. the first character of + * "pair"). + */ +static void skipToMatch (const char *const pair) +{ + const boolean braceMatching = (boolean) (strcmp ("{}", pair) == 0); + const boolean braceFormatting = (boolean) (isBraceFormat () && braceMatching); + const unsigned int initialLevel = getDirectiveNestLevel (); + const int begin = pair [0], end = pair [1]; + const unsigned long inputLineNumber = getInputLineNumber (); + int matchLevel = 1; + int c = '\0'; + + while (matchLevel > 0 && (c = skipToNonWhite ()) != EOF) + { + if (CollectingSignature) + vStringPut (Signature, c); + if (c == begin) + { + ++matchLevel; + if (braceFormatting && getDirectiveNestLevel () != initialLevel) + { + skipToFormattedBraceMatch (); + break; + } + } + else if (c == end) + { + --matchLevel; + if (braceFormatting && getDirectiveNestLevel () != initialLevel) + { + skipToFormattedBraceMatch (); + break; + } + } + } + if (c == EOF) + { + verbose ("%s: failed to find match for '%c' at line %lu\n", + getInputFileName (), begin, inputLineNumber); + if (braceMatching) + longjmp (Exception, (int) ExceptionBraceFormattingError); + else + longjmp (Exception, (int) ExceptionFormattingError); + } +} + +static void skipParens (void) +{ + const int c = skipToNonWhite (); + + if (c == '(') + skipToMatch ("()"); + else + cppUngetc (c); +} + +static void skipBraces (void) +{ + const int c = skipToNonWhite (); + + if (c == '{') + skipToMatch ("{}"); + else + cppUngetc (c); +} + +static keywordId analyzeKeyword (const char *const name) +{ + const keywordId id = (keywordId) lookupKeyword (name, getSourceLanguage ()); + return id; +} + +static void analyzeIdentifier (tokenInfo *const token) +{ + char *const name = vStringValue (token->name); + const char *replacement = NULL; + boolean parensToo = FALSE; + + if (isLanguage (Lang_java) || + ! isIgnoreToken (name, &parensToo, &replacement)) + { + if (replacement != NULL) + token->keyword = analyzeKeyword (replacement); + else + token->keyword = analyzeKeyword (vStringValue (token->name)); + + if (token->keyword == KEYWORD_NONE) + token->type = TOKEN_NAME; + else + token->type = TOKEN_KEYWORD; + } + else + { + initToken (token); + if (parensToo) + { + int c = skipToNonWhite (); + + if (c == '(') + skipToMatch ("()"); + } + } +} + +static void readIdentifier (tokenInfo *const token, const int firstChar) +{ + vString *const name = token->name; + int c = firstChar; + boolean first = TRUE; + + initToken (token); + + /* Bug #1585745: strangely, C++ destructors allow whitespace between + * the ~ and the class name. */ + if (isLanguage (Lang_cpp) && firstChar == '~') + { + vStringPut (name, c); + c = skipToNonWhite (); + } + + do + { + vStringPut (name, c); + if (CollectingSignature) + { + if (!first) + vStringPut (Signature, c); + first = FALSE; + } + c = cppGetc (); + } while (isident (c) || ((isLanguage (Lang_java) || isLanguage (Lang_csharp)) && (isHighChar (c) || c == '.'))); + vStringTerminate (name); + cppUngetc (c); /* unget non-identifier character */ + + analyzeIdentifier (token); +} + +static void readPackageName (tokenInfo *const token, const int firstChar) +{ + vString *const name = token->name; + int c = firstChar; + + initToken (token); + + while (isident (c) || c == '.') + { + vStringPut (name, c); + c = cppGetc (); + } + vStringTerminate (name); + cppUngetc (c); /* unget non-package character */ +} + +static void readPackageOrNamespace (statementInfo *const st, const declType declaration) +{ + st->declaration = declaration; + + if (declaration == DECL_NAMESPACE && !isLanguage (Lang_csharp)) + { + /* In C++ a namespace is specified one level at a time. */ + return; + } + else + { + /* In C#, a namespace can also be specified like a Java package name. */ + tokenInfo *const token = activeToken (st); + Assert (isType (token, TOKEN_KEYWORD)); + readPackageName (token, skipToNonWhite ()); + token->type = TOKEN_NAME; + st->gotName = TRUE; + st->haveQualifyingName = TRUE; + } +} + +static void processName (statementInfo *const st) +{ + Assert (isType (activeToken (st), TOKEN_NAME)); + if (st->gotName && st->declaration == DECL_NONE) + st->declaration = DECL_BASE; + st->gotName = TRUE; + st->haveQualifyingName = TRUE; +} + +static void readOperator (statementInfo *const st) +{ + const char *const acceptable = "+-*/%^&|~!=<>,[]"; + const tokenInfo* const prev = prevToken (st,1); + tokenInfo *const token = activeToken (st); + vString *const name = token->name; + int c = skipToNonWhite (); + + /* When we arrive here, we have the keyword "operator" in 'name'. + */ + if (isType (prev, TOKEN_KEYWORD) && (prev->keyword == KEYWORD_ENUM || + prev->keyword == KEYWORD_STRUCT || prev->keyword == KEYWORD_UNION)) + ; /* ignore "operator" keyword if preceded by these keywords */ + else if (c == '(') + { + /* Verify whether this is a valid function call (i.e. "()") operator. + */ + if (cppGetc () == ')') + { + vStringPut (name, ' '); /* always separate operator from keyword */ + c = skipToNonWhite (); + if (c == '(') + vStringCatS (name, "()"); + } + else + { + skipToMatch ("()"); + c = cppGetc (); + } + } + else if (isident1 (c)) + { + /* Handle "new" and "delete" operators, and conversion functions + * (per 13.3.1.1.2 [2] of the C++ spec). + */ + boolean whiteSpace = TRUE; /* default causes insertion of space */ + do + { + if (isspace (c)) + whiteSpace = TRUE; + else + { + if (whiteSpace) + { + vStringPut (name, ' '); + whiteSpace = FALSE; + } + vStringPut (name, c); + } + c = cppGetc (); + } while (! isOneOf (c, "(;") && c != EOF); + vStringTerminate (name); + } + else if (isOneOf (c, acceptable)) + { + vStringPut (name, ' '); /* always separate operator from keyword */ + do + { + vStringPut (name, c); + c = cppGetc (); + } while (isOneOf (c, acceptable)); + vStringTerminate (name); + } + + cppUngetc (c); + + token->type = TOKEN_NAME; + token->keyword = KEYWORD_NONE; + processName (st); +} + +static void copyToken (tokenInfo *const dest, const tokenInfo *const src) +{ + dest->type = src->type; + dest->keyword = src->keyword; + dest->filePosition = src->filePosition; + dest->lineNumber = src->lineNumber; + vStringCopy (dest->name, src->name); +} + +static void setAccess (statementInfo *const st, const accessType access) +{ + if (isMember (st)) + { + if (isLanguage (Lang_cpp)) + { + int c = skipToNonWhite (); + + if (c == ':') + reinitStatement (st, FALSE); + else + cppUngetc (c); + + st->member.accessDefault = access; + } + st->member.access = access; + } +} + +static void discardTypeList (tokenInfo *const token) +{ + int c = skipToNonWhite (); + while (isident1 (c)) + { + readIdentifier (token, c); + c = skipToNonWhite (); + if (c == '.' || c == ',') + c = skipToNonWhite (); + } + cppUngetc (c); +} + +static void addParentClass (statementInfo *const st, tokenInfo *const token) +{ + if (vStringLength (token->name) > 0 && + vStringLength (st->parentClasses) > 0) + { + vStringPut (st->parentClasses, ','); + } + vStringCat (st->parentClasses, token->name); +} + +static void readParents (statementInfo *const st, const int qualifier) +{ + tokenInfo *const token = newToken (); + tokenInfo *const parent = newToken (); + int c; + + do + { + c = skipToNonWhite (); + if (isident1 (c)) + { + readIdentifier (token, c); + if (isType (token, TOKEN_NAME)) + vStringCat (parent->name, token->name); + else + { + addParentClass (st, parent); + initToken (parent); + } + } + else if (c == qualifier) + vStringPut (parent->name, c); + else if (c == '<') + skipToMatch ("<>"); + else if (isType (token, TOKEN_NAME)) + { + addParentClass (st, parent); + initToken (parent); + } + } while (c != '{' && c != EOF); + cppUngetc (c); + deleteToken (parent); + deleteToken (token); +} + +static void skipStatement (statementInfo *const st) +{ + st->declaration = DECL_IGNORE; + skipToOneOf (";"); +} + +static void processInterface (statementInfo *const st) +{ + st->declaration = DECL_INTERFACE; +} + +static void processToken (tokenInfo *const token, statementInfo *const st) +{ + switch (token->keyword) /* is it a reserved word? */ + { + default: break; + + case KEYWORD_NONE: processName (st); break; + case KEYWORD_ABSTRACT: st->implementation = IMP_ABSTRACT; break; + case KEYWORD_ATTRIBUTE: skipParens (); initToken (token); break; + case KEYWORD_BIND: st->declaration = DECL_BASE; break; + case KEYWORD_BIT: st->declaration = DECL_BASE; break; + case KEYWORD_CATCH: skipParens (); skipBraces (); break; + case KEYWORD_CHAR: st->declaration = DECL_BASE; break; + case KEYWORD_CLASS: st->declaration = DECL_CLASS; break; + case KEYWORD_CONST: st->declaration = DECL_BASE; break; + case KEYWORD_DOUBLE: st->declaration = DECL_BASE; break; + case KEYWORD_ENUM: st->declaration = DECL_ENUM; break; + case KEYWORD_EXTENDS: readParents (st, '.'); + setToken (st, TOKEN_NONE); break; + case KEYWORD_FLOAT: st->declaration = DECL_BASE; break; + case KEYWORD_FUNCTION: st->declaration = DECL_BASE; break; + case KEYWORD_FRIEND: st->scope = SCOPE_FRIEND; break; + case KEYWORD_GOTO: skipStatement (st); break; + case KEYWORD_IMPLEMENTS:readParents (st, '.'); + setToken (st, TOKEN_NONE); break; + case KEYWORD_IMPORT: skipStatement (st); break; + case KEYWORD_INT: st->declaration = DECL_BASE; break; + case KEYWORD_INTEGER: st->declaration = DECL_BASE; break; + case KEYWORD_INTERFACE: processInterface (st); break; + case KEYWORD_LOCAL: setAccess (st, ACCESS_LOCAL); break; + case KEYWORD_LONG: st->declaration = DECL_BASE; break; + case KEYWORD_OPERATOR: readOperator (st); break; + case KEYWORD_PRIVATE: setAccess (st, ACCESS_PRIVATE); break; + case KEYWORD_PROGRAM: st->declaration = DECL_PROGRAM; break; + case KEYWORD_PROTECTED: setAccess (st, ACCESS_PROTECTED); break; + case KEYWORD_PUBLIC: setAccess (st, ACCESS_PUBLIC); break; + case KEYWORD_RETURN: skipStatement (st); break; + case KEYWORD_SHORT: st->declaration = DECL_BASE; break; + case KEYWORD_SIGNED: st->declaration = DECL_BASE; break; + case KEYWORD_STRING: st->declaration = DECL_BASE; break; + case KEYWORD_STRUCT: st->declaration = DECL_STRUCT; break; + case KEYWORD_TASK: st->declaration = DECL_TASK; break; + case KEYWORD_THROWS: discardTypeList (token); break; + case KEYWORD_UNION: st->declaration = DECL_UNION; break; + case KEYWORD_UNSIGNED: st->declaration = DECL_BASE; break; + case KEYWORD_USING: skipStatement (st); break; + case KEYWORD_VOID: st->declaration = DECL_BASE; break; + case KEYWORD_VOLATILE: st->declaration = DECL_BASE; break; + case KEYWORD_VIRTUAL: st->implementation = IMP_VIRTUAL; break; + case KEYWORD_WCHAR_T: st->declaration = DECL_BASE; break; + + case KEYWORD_NAMESPACE: readPackageOrNamespace (st, DECL_NAMESPACE); break; + case KEYWORD_PACKAGE: readPackageOrNamespace (st, DECL_PACKAGE); break; + + case KEYWORD_EVENT: + if (isLanguage (Lang_csharp)) + st->declaration = DECL_EVENT; + break; + + case KEYWORD_TYPEDEF: + reinitStatement (st, FALSE); + st->scope = SCOPE_TYPEDEF; + break; + + case KEYWORD_EXTERN: + if (! isLanguage (Lang_csharp) || !st->gotName) + { + reinitStatement (st, FALSE); + st->scope = SCOPE_EXTERN; + st->declaration = DECL_BASE; + } + break; + + case KEYWORD_STATIC: + if (! (isLanguage (Lang_java) || isLanguage (Lang_csharp))) + { + reinitStatement (st, FALSE); + st->scope = SCOPE_STATIC; + st->declaration = DECL_BASE; + } + break; + + case KEYWORD_FOR: + case KEYWORD_FOREACH: + case KEYWORD_IF: + case KEYWORD_SWITCH: + case KEYWORD_WHILE: + { + int c = skipToNonWhite (); + if (c == '(') + skipToMatch ("()"); + break; + } + } +} + +/* +* Parenthesis handling functions +*/ + +static void restartStatement (statementInfo *const st) +{ + tokenInfo *const save = newToken (); + tokenInfo *token = activeToken (st); + + copyToken (save, token); + DebugStatement ( if (debug (DEBUG_PARSE)) printf ("");) + reinitStatement (st, FALSE); + token = activeToken (st); + copyToken (token, save); + deleteToken (save); + processToken (token, st); +} + +/* Skips over a the mem-initializer-list of a ctor-initializer, defined as: + * + * mem-initializer-list: + * mem-initializer, mem-initializer-list + * + * mem-initializer: + * [::] [nested-name-spec] class-name (...) + * identifier + */ +static void skipMemIntializerList (tokenInfo *const token) +{ + int c; + + do + { + c = skipToNonWhite (); + while (isident1 (c) || c == ':') + { + if (c != ':') + readIdentifier (token, c); + c = skipToNonWhite (); + } + if (c == '<') + { + skipToMatch ("<>"); + c = skipToNonWhite (); + } + if (c == '(') + { + skipToMatch ("()"); + c = skipToNonWhite (); + } + } while (c == ','); + cppUngetc (c); +} + +static void skipMacro (statementInfo *const st) +{ + tokenInfo *const prev2 = prevToken (st, 2); + + if (isType (prev2, TOKEN_NAME)) + retardToken (st); + skipToMatch ("()"); +} + +/* Skips over characters following the parameter list. This will be either + * non-ANSI style function declarations or C++ stuff. Our choices: + * + * C (K&R): + * int func (); + * int func (one, two) int one; float two; {...} + * C (ANSI): + * int func (int one, float two); + * int func (int one, float two) {...} + * C++: + * int foo (...) [const|volatile] [throw (...)]; + * int foo (...) [const|volatile] [throw (...)] [ctor-initializer] {...} + * int foo (...) [const|volatile] [throw (...)] try [ctor-initializer] {...} + * catch (...) {...} + */ +static boolean skipPostArgumentStuff ( + statementInfo *const st, parenInfo *const info) +{ + tokenInfo *const token = activeToken (st); + unsigned int parameters = info->parameterCount; + unsigned int elementCount = 0; + boolean restart = FALSE; + boolean end = FALSE; + int c = skipToNonWhite (); + + do + { + switch (c) + { + case ')': break; + case ':': skipMemIntializerList (token);break; /* ctor-initializer */ + case '[': skipToMatch ("[]"); break; + case '=': cppUngetc (c); end = TRUE; break; + case '{': cppUngetc (c); end = TRUE; break; + case '}': cppUngetc (c); end = TRUE; break; + + case '(': + if (elementCount > 0) + ++elementCount; + skipToMatch ("()"); + break; + + case ';': + if (parameters == 0 || elementCount < 2) + { + cppUngetc (c); + end = TRUE; + } + else if (--parameters == 0) + end = TRUE; + break; + + default: + if (isident1 (c)) + { + readIdentifier (token, c); + switch (token->keyword) + { + case KEYWORD_ATTRIBUTE: skipParens (); break; + case KEYWORD_THROW: skipParens (); break; + case KEYWORD_TRY: break; + + case KEYWORD_CONST: + case KEYWORD_VOLATILE: + if (vStringLength (Signature) > 0) + { + vStringPut (Signature, ' '); + vStringCat (Signature, token->name); + } + break; + + case KEYWORD_CATCH: + case KEYWORD_CLASS: + case KEYWORD_EXPLICIT: + case KEYWORD_EXTERN: + case KEYWORD_FRIEND: + case KEYWORD_INLINE: + case KEYWORD_MUTABLE: + case KEYWORD_NAMESPACE: + case KEYWORD_NEW: + case KEYWORD_NEWCOV: + case KEYWORD_OPERATOR: + case KEYWORD_OVERLOAD: + case KEYWORD_PRIVATE: + case KEYWORD_PROTECTED: + case KEYWORD_PUBLIC: + case KEYWORD_STATIC: + case KEYWORD_TEMPLATE: + case KEYWORD_TYPEDEF: + case KEYWORD_TYPENAME: + case KEYWORD_USING: + case KEYWORD_VIRTUAL: + /* Never allowed within parameter declarations. */ + restart = TRUE; + end = TRUE; + break; + + default: + if (isType (token, TOKEN_NONE)) + ; + else if (info->isKnrParamList && info->parameterCount > 0) + ++elementCount; + else + { + /* If we encounter any other identifier immediately + * following an empty parameter list, this is almost + * certainly one of those Microsoft macro "thingies" + * that the automatic source code generation sticks + * in. Terminate the current statement. + */ + restart = TRUE; + end = TRUE; + } + break; + } + } + } + if (! end) + { + c = skipToNonWhite (); + if (c == EOF) + end = TRUE; + } + } while (! end); + + if (restart) + restartStatement (st); + else + setToken (st, TOKEN_NONE); + + return (boolean) (c != EOF); +} + +static void skipJavaThrows (statementInfo *const st) +{ + tokenInfo *const token = activeToken (st); + int c = skipToNonWhite (); + + if (isident1 (c)) + { + readIdentifier (token, c); + if (token->keyword == KEYWORD_THROWS) + { + do + { + c = skipToNonWhite (); + if (isident1 (c)) + { + readIdentifier (token, c); + c = skipToNonWhite (); + } + } while (c == '.' || c == ','); + } + } + cppUngetc (c); + setToken (st, TOKEN_NONE); +} + +static void analyzePostParens (statementInfo *const st, parenInfo *const info) +{ + const unsigned long inputLineNumber = getInputLineNumber (); + int c = skipToNonWhite (); + + cppUngetc (c); + if (isOneOf (c, "{;,=")) + ; + else if (isLanguage (Lang_java)) + skipJavaThrows (st); + else + { + if (! skipPostArgumentStuff (st, info)) + { + verbose ( + "%s: confusing argument declarations beginning at line %lu\n", + getInputFileName (), inputLineNumber); + longjmp (Exception, (int) ExceptionFormattingError); + } + } +} + +static boolean languageSupportsGenerics (void) +{ + return (boolean) (isLanguage (Lang_cpp) || isLanguage (Lang_csharp) || + isLanguage (Lang_java)); +} + +static void processAngleBracket (void) +{ + int c = cppGetc (); + if (c == '>') { + /* already found match for template */ + } else if (languageSupportsGenerics () && c != '<' && c != '=') { + /* this is a template */ + cppUngetc (c); + skipToMatch ("<>"); + } else if (c == '<') { + /* skip "<<" or "<<=". */ + c = cppGetc (); + if (c != '=') { + cppUngetc (c); + } + } else { + cppUngetc (c); + } +} + +static void parseJavaAnnotation (statementInfo *const st) +{ + /* + * @Override + * @Target(ElementType.METHOD) + * @SuppressWarnings(value = "unchecked") + * + * But watch out for "@interface"! + */ + tokenInfo *const token = activeToken (st); + + int c = skipToNonWhite (); + readIdentifier (token, c); + if (token->keyword == KEYWORD_INTERFACE) + { + /* Oops. This was actually "@interface" defining a new annotation. */ + processInterface (st); + } + else + { + /* Bug #1691412: skip any annotation arguments. */ + skipParens (); + } +} + +static int parseParens (statementInfo *const st, parenInfo *const info) +{ + tokenInfo *const token = activeToken (st); + unsigned int identifierCount = 0; + unsigned int depth = 1; + boolean firstChar = TRUE; + int nextChar = '\0'; + + CollectingSignature = TRUE; + vStringClear (Signature); + vStringPut (Signature, '('); + info->parameterCount = 1; + do + { + int c = skipToNonWhite (); + vStringPut (Signature, c); + + switch (c) + { + case '&': + case '*': + info->isPointer = TRUE; + info->isKnrParamList = FALSE; + if (identifierCount == 0) + info->isParamList = FALSE; + initToken (token); + break; + + case ':': + info->isKnrParamList = FALSE; + break; + + case '.': + info->isNameCandidate = FALSE; + c = cppGetc (); + if (c != '.') + { + cppUngetc (c); + info->isKnrParamList = FALSE; + } + else + { + c = cppGetc (); + if (c != '.') + { + cppUngetc (c); + info->isKnrParamList = FALSE; + } + else + vStringCatS (Signature, "..."); /* variable arg list */ + } + break; + + case ',': + info->isNameCandidate = FALSE; + if (info->isKnrParamList) + { + ++info->parameterCount; + identifierCount = 0; + } + break; + + case '=': + info->isKnrParamList = FALSE; + info->isNameCandidate = FALSE; + if (firstChar) + { + info->isParamList = FALSE; + skipMacro (st); + depth = 0; + } + break; + + case '[': + info->isKnrParamList = FALSE; + skipToMatch ("[]"); + break; + + case '<': + info->isKnrParamList = FALSE; + processAngleBracket (); + break; + + case ')': + if (firstChar) + info->parameterCount = 0; + --depth; + break; + + case '(': + info->isKnrParamList = FALSE; + if (firstChar) + { + info->isNameCandidate = FALSE; + cppUngetc (c); + vStringClear (Signature); + skipMacro (st); + depth = 0; + vStringChop (Signature); + } + else if (isType (token, TOKEN_PAREN_NAME)) + { + c = skipToNonWhite (); + if (c == '*') /* check for function pointer */ + { + skipToMatch ("()"); + c = skipToNonWhite (); + if (c == '(') + skipToMatch ("()"); + else + cppUngetc (c); + } + else + { + cppUngetc (c); + cppUngetc ('('); + info->nestedArgs = TRUE; + } + } + else + ++depth; + break; + + default: + if (c == '@' && isLanguage (Lang_java)) + { + parseJavaAnnotation(st); + } + else if (isident1 (c)) + { + if (++identifierCount > 1) + info->isKnrParamList = FALSE; + readIdentifier (token, c); + if (isType (token, TOKEN_NAME) && info->isNameCandidate) + token->type = TOKEN_PAREN_NAME; + else if (isType (token, TOKEN_KEYWORD)) + { + if (token->keyword != KEYWORD_CONST && + token->keyword != KEYWORD_VOLATILE) + { + info->isKnrParamList = FALSE; + info->isNameCandidate = FALSE; + } + } + } + else + { + info->isParamList = FALSE; + info->isKnrParamList = FALSE; + info->isNameCandidate = FALSE; + info->invalidContents = TRUE; + } + break; + } + firstChar = FALSE; + } while (! info->nestedArgs && depth > 0 && + (info->isKnrParamList || info->isNameCandidate)); + + if (! info->nestedArgs) while (depth > 0) + { + skipToMatch ("()"); + --depth; + } + + if (! info->isNameCandidate) + initToken (token); + + vStringTerminate (Signature); + if (info->isKnrParamList) + vStringClear (Signature); + CollectingSignature = FALSE; + return nextChar; +} + +static void initParenInfo (parenInfo *const info) +{ + info->isPointer = FALSE; + info->isParamList = TRUE; + info->isKnrParamList = isLanguage (Lang_c); + info->isNameCandidate = TRUE; + info->invalidContents = FALSE; + info->nestedArgs = FALSE; + info->parameterCount = 0; +} + +static void analyzeParens (statementInfo *const st) +{ + tokenInfo *const prev = prevToken (st, 1); + + if (st->inFunction && ! st->assignment) + st->notVariable = TRUE; + if (! isType (prev, TOKEN_NONE)) /* in case of ignored enclosing macros */ + { + tokenInfo *const token = activeToken (st); + parenInfo info; + int c; + + initParenInfo (&info); + parseParens (st, &info); + c = skipToNonWhite (); + cppUngetc (c); + if (info.invalidContents) + reinitStatement (st, FALSE); + else if (info.isNameCandidate && isType (token, TOKEN_PAREN_NAME) && + ! st->gotParenName && + (! info.isParamList || ! st->haveQualifyingName || + c == '(' || + (c == '=' && st->implementation != IMP_VIRTUAL) || + (st->declaration == DECL_NONE && isOneOf (c, ",;")))) + { + token->type = TOKEN_NAME; + processName (st); + st->gotParenName = TRUE; + if (! (c == '(' && info.nestedArgs)) + st->isPointer = info.isPointer; + } + else if (! st->gotArgs && info.isParamList) + { + st->gotArgs = TRUE; + setToken (st, TOKEN_ARGS); + advanceToken (st); + if (st->scope != SCOPE_TYPEDEF) + analyzePostParens (st, &info); + } + else + setToken (st, TOKEN_NONE); + } +} + +/* +* Token parsing functions +*/ + +static void addContext (statementInfo *const st, const tokenInfo* const token) +{ + if (isType (token, TOKEN_NAME)) + { + if (vStringLength (st->context->name) > 0) + { + if (isLanguage (Lang_c) || isLanguage (Lang_cpp)) + vStringCatS (st->context->name, "::"); + else if (isLanguage (Lang_java) || isLanguage (Lang_csharp)) + vStringCatS (st->context->name, "."); + } + vStringCat (st->context->name, token->name); + st->context->type = TOKEN_NAME; + } +} + +static boolean inheritingDeclaration (declType decl) +{ + /* C# supports inheritance for enums. C++0x will too, but not yet. */ + if (decl == DECL_ENUM) + { + return (boolean) (isLanguage (Lang_csharp)); + } + return (boolean) ( + decl == DECL_CLASS || + decl == DECL_STRUCT || + decl == DECL_INTERFACE); +} + +static void processColon (statementInfo *const st) +{ + int c = (isLanguage (Lang_cpp) ? cppGetc () : skipToNonWhite ()); + const boolean doubleColon = (boolean) (c == ':'); + + if (doubleColon) + { + setToken (st, TOKEN_DOUBLE_COLON); + st->haveQualifyingName = FALSE; + } + else + { + cppUngetc (c); + if ((isLanguage (Lang_cpp) || isLanguage (Lang_csharp)) && + inheritingDeclaration (st->declaration)) + { + readParents (st, ':'); + } + else if (parentDecl (st) == DECL_STRUCT) + { + c = skipToOneOf (",;"); + if (c == ',') + setToken (st, TOKEN_COMMA); + else if (c == ';') + setToken (st, TOKEN_SEMICOLON); + } + else + { + const tokenInfo *const prev = prevToken (st, 1); + const tokenInfo *const prev2 = prevToken (st, 2); + if (prev->keyword == KEYWORD_DEFAULT || + prev2->keyword == KEYWORD_CASE || + st->parent != NULL) + { + reinitStatement (st, FALSE); + } + } + } +} + +/* Skips over any initializing value which may follow an '=' character in a + * variable definition. + */ +static int skipInitializer (statementInfo *const st) +{ + boolean done = FALSE; + int c; + + while (! done) + { + c = skipToNonWhite (); + + if (c == EOF) + longjmp (Exception, (int) ExceptionFormattingError); + else switch (c) + { + case ',': + case ';': done = TRUE; break; + + case '0': + if (st->implementation == IMP_VIRTUAL) + st->implementation = IMP_PURE_VIRTUAL; + break; + + case '[': skipToMatch ("[]"); break; + case '(': skipToMatch ("()"); break; + case '{': skipToMatch ("{}"); break; + case '<': processAngleBracket(); break; + + case '}': + if (insideEnumBody (st)) + done = TRUE; + else if (! isBraceFormat ()) + { + verbose ("%s: unexpected closing brace at line %lu\n", + getInputFileName (), getInputLineNumber ()); + longjmp (Exception, (int) ExceptionBraceFormattingError); + } + break; + + default: break; + } + } + return c; +} + +static void processInitializer (statementInfo *const st) +{ + const boolean inEnumBody = insideEnumBody (st); + int c = cppGetc (); + + if (c != '=') + { + cppUngetc (c); + c = skipInitializer (st); + st->assignment = TRUE; + if (c == ';') + setToken (st, TOKEN_SEMICOLON); + else if (c == ',') + setToken (st, TOKEN_COMMA); + else if (c == '}' && inEnumBody) + { + cppUngetc (c); + setToken (st, TOKEN_COMMA); + } + if (st->scope == SCOPE_EXTERN) + st->scope = SCOPE_GLOBAL; + } +} + +static void parseIdentifier (statementInfo *const st, const int c) +{ + tokenInfo *const token = activeToken (st); + + readIdentifier (token, c); + if (! isType (token, TOKEN_NONE)) + processToken (token, st); +} + +static void parseGeneralToken (statementInfo *const st, const int c) +{ + const tokenInfo *const prev = prevToken (st, 1); + + if (isident1 (c) || (isLanguage (Lang_java) && isHighChar (c))) + { + parseIdentifier (st, c); + if (isType (st->context, TOKEN_NAME) && + isType (activeToken (st), TOKEN_NAME) && isType (prev, TOKEN_NAME)) + { + initToken (st->context); + } + } + else if (c == '.' || c == '-') + { + if (! st->assignment) + st->notVariable = TRUE; + if (c == '-') + { + int c2 = cppGetc (); + if (c2 != '>') + cppUngetc (c2); + } + } + else if (c == '!' || c == '>') + { + int c2 = cppGetc (); + if (c2 != '=') + cppUngetc (c2); + } + else if (c == '@' && isLanguage (Lang_java)) + { + parseJavaAnnotation (st); + } + else if (isExternCDecl (st, c)) + { + st->declaration = DECL_NOMANGLE; + st->scope = SCOPE_GLOBAL; + } +} + +/* Reads characters from the pre-processor and assembles tokens, setting + * the current statement state. + */ +static void nextToken (statementInfo *const st) +{ + tokenInfo *token; + do + { + int c = skipToNonWhite (); + switch (c) + { + case EOF: longjmp (Exception, (int) ExceptionEOF); break; + case '(': analyzeParens (st); break; + case '<': processAngleBracket (); break; + case '*': st->haveQualifyingName = FALSE; break; + case ',': setToken (st, TOKEN_COMMA); break; + case ':': processColon (st); break; + case ';': setToken (st, TOKEN_SEMICOLON); break; + case '=': processInitializer (st); break; + case '[': skipToMatch ("[]"); break; + case '{': setToken (st, TOKEN_BRACE_OPEN); break; + case '}': setToken (st, TOKEN_BRACE_CLOSE); break; + default: parseGeneralToken (st, c); break; + } + token = activeToken (st); + } while (isType (token, TOKEN_NONE)); +} + +/* +* Scanning support functions +*/ + +static statementInfo *CurrentStatement = NULL; + +static statementInfo *newStatement (statementInfo *const parent) +{ + statementInfo *const st = xMalloc (1, statementInfo); + unsigned int i; + + for (i = 0 ; i < (unsigned int) NumTokens ; ++i) + st->token [i] = newToken (); + + st->context = newToken (); + st->blockName = newToken (); + st->parentClasses = vStringNew (); + + initStatement (st, parent); + CurrentStatement = st; + + return st; +} + +static void deleteStatement (void) +{ + statementInfo *const st = CurrentStatement; + statementInfo *const parent = st->parent; + unsigned int i; + + for (i = 0 ; i < (unsigned int) NumTokens ; ++i) + { + deleteToken (st->token [i]); st->token [i] = NULL; + } + deleteToken (st->blockName); st->blockName = NULL; + deleteToken (st->context); st->context = NULL; + vStringDelete (st->parentClasses); st->parentClasses = NULL; + eFree (st); + CurrentStatement = parent; +} + +static void deleteAllStatements (void) +{ + while (CurrentStatement != NULL) + deleteStatement (); +} + +static boolean isStatementEnd (const statementInfo *const st) +{ + const tokenInfo *const token = activeToken (st); + boolean isEnd; + + if (isType (token, TOKEN_SEMICOLON)) + isEnd = TRUE; + else if (isType (token, TOKEN_BRACE_CLOSE)) + /* Java and C# do not require semicolons to end a block. Neither do C++ + * namespaces. All other blocks require a semicolon to terminate them. + */ + isEnd = (boolean) (isLanguage (Lang_java) || isLanguage (Lang_csharp) || + ! isContextualStatement (st)); + else + isEnd = FALSE; + + return isEnd; +} + +static void checkStatementEnd (statementInfo *const st) +{ + const tokenInfo *const token = activeToken (st); + + if (isType (token, TOKEN_COMMA)) + reinitStatement (st, TRUE); + else if (isStatementEnd (st)) + { + DebugStatement ( if (debug (DEBUG_PARSE)) printf (""); ) + reinitStatement (st, FALSE); + cppEndStatement (); + } + else + { + cppBeginStatement (); + advanceToken (st); + } +} + +static void nest (statementInfo *const st, const unsigned int nestLevel) +{ + switch (st->declaration) + { + case DECL_CLASS: + case DECL_ENUM: + case DECL_INTERFACE: + case DECL_NAMESPACE: + case DECL_NOMANGLE: + case DECL_STRUCT: + case DECL_UNION: + createTags (nestLevel, st); + break; + + case DECL_FUNCTION: + case DECL_TASK: + st->inFunction = TRUE; + /* fall through */ + default: + if (includeTag (TAG_LOCAL, FALSE)) + createTags (nestLevel, st); + else + skipToMatch ("{}"); + break; + } + advanceToken (st); + setToken (st, TOKEN_BRACE_CLOSE); +} + +static void tagCheck (statementInfo *const st) +{ + const tokenInfo *const token = activeToken (st); + const tokenInfo *const prev = prevToken (st, 1); + const tokenInfo *const prev2 = prevToken (st, 2); + + switch (token->type) + { + case TOKEN_NAME: + if (insideEnumBody (st)) + qualifyEnumeratorTag (st, token); + break; +#if 0 + case TOKEN_PACKAGE: + if (st->haveQualifyingName) + makeTag (token, st, FALSE, TAG_PACKAGE); + break; +#endif + case TOKEN_BRACE_OPEN: + if (isType (prev, TOKEN_ARGS)) + { + if (st->haveQualifyingName) + { + if (! isLanguage (Lang_vera)) + st->declaration = DECL_FUNCTION; + if (isType (prev2, TOKEN_NAME)) + copyToken (st->blockName, prev2); + qualifyFunctionTag (st, prev2); + } + } + else if (isContextualStatement (st) || + st->declaration == DECL_NAMESPACE || + st->declaration == DECL_PROGRAM) + { + if (isType (prev, TOKEN_NAME)) + copyToken (st->blockName, prev); + else + { + /* For an anonymous struct or union we use a unique ID + * a number, so that the members can be found. + */ + char buf [20]; /* length of "_anon" + digits + null */ + sprintf (buf, "__anon%d", ++AnonymousID); + vStringCopyS (st->blockName->name, buf); + st->blockName->type = TOKEN_NAME; + st->blockName->keyword = KEYWORD_NONE; + } + qualifyBlockTag (st, prev); + } + else if (isLanguage (Lang_csharp)) + makeTag (prev, st, FALSE, TAG_PROPERTY); + break; + + case TOKEN_SEMICOLON: + case TOKEN_COMMA: + if (insideEnumBody (st)) + ; + else if (isType (prev, TOKEN_NAME)) + { + if (isContextualKeyword (prev2)) + makeTag (prev, st, TRUE, TAG_EXTERN_VAR); + else + qualifyVariableTag (st, prev); + } + else if (isType (prev, TOKEN_ARGS) && isType (prev2, TOKEN_NAME)) + { + if (st->isPointer) + qualifyVariableTag (st, prev2); + else + qualifyFunctionDeclTag (st, prev2); + } + if (isLanguage (Lang_java) && token->type == TOKEN_SEMICOLON && insideEnumBody (st)) + { + /* In Java, after an initial enum-like part, + * a semicolon introduces a class-like part. + * See Bug #1730485 for the full rationale. */ + st->parent->declaration = DECL_CLASS; + } + break; + + default: break; + } +} + +/* Parses the current file and decides whether to write out and tags that + * are discovered. + */ +static void createTags (const unsigned int nestLevel, + statementInfo *const parent) +{ + statementInfo *const st = newStatement (parent); + + DebugStatement ( if (nestLevel > 0) debugParseNest (TRUE, nestLevel); ) + while (TRUE) + { + tokenInfo *token; + + nextToken (st); + token = activeToken (st); + if (isType (token, TOKEN_BRACE_CLOSE)) + { + if (nestLevel > 0) + break; + else + { + verbose ("%s: unexpected closing brace at line %lu\n", + getInputFileName (), getInputLineNumber ()); + longjmp (Exception, (int) ExceptionBraceFormattingError); + } + } + else if (isType (token, TOKEN_DOUBLE_COLON)) + { + addContext (st, prevToken (st, 1)); + advanceToken (st); + } + else + { + tagCheck (st); + if (isType (token, TOKEN_BRACE_OPEN)) + nest (st, nestLevel + 1); + checkStatementEnd (st); + } + } + deleteStatement (); + DebugStatement ( if (nestLevel > 0) debugParseNest (FALSE, nestLevel - 1); ) +} + +static boolean findCTags (const unsigned int passCount) +{ + exception_t exception; + boolean retry; + + Assert (passCount < 3); + cppInit ((boolean) (passCount > 1), isLanguage (Lang_csharp)); + Signature = vStringNew (); + + exception = (exception_t) setjmp (Exception); + retry = FALSE; + if (exception == ExceptionNone) + createTags (0, NULL); + else + { + deleteAllStatements (); + if (exception == ExceptionBraceFormattingError && passCount == 1) + { + retry = TRUE; + verbose ("%s: retrying file with fallback brace matching algorithm\n", + getInputFileName ()); + } + } + vStringDelete (Signature); + cppTerminate (); + return retry; +} + +static void buildKeywordHash (const langType language, unsigned int idx) +{ + const size_t count = sizeof (KeywordTable) / sizeof (KeywordTable [0]); + size_t i; + for (i = 0 ; i < count ; ++i) + { + const keywordDesc* const p = &KeywordTable [i]; + if (p->isValid [idx]) + addKeyword (p->name, language, (int) p->id); + } +} + +static void initializeCParser (const langType language) +{ + Lang_c = language; + buildKeywordHash (language, 0); +} + +static void initializeCppParser (const langType language) +{ + Lang_cpp = language; + buildKeywordHash (language, 1); +} + +static void initializeCsharpParser (const langType language) +{ + Lang_csharp = language; + buildKeywordHash (language, 2); +} + +static void initializeJavaParser (const langType language) +{ + Lang_java = language; + buildKeywordHash (language, 3); +} + +static void initializeVeraParser (const langType language) +{ + Lang_vera = language; + buildKeywordHash (language, 4); +} + +extern parserDefinition* CParser (void) +{ + static const char *const extensions [] = { "c", NULL }; + parserDefinition* def = parserNew ("C"); + def->kinds = CKinds; + def->kindCount = KIND_COUNT (CKinds); + def->extensions = extensions; + def->parser2 = findCTags; + def->initialize = initializeCParser; + return def; +} + +extern parserDefinition* CppParser (void) +{ + static const char *const extensions [] = { + "c++", "cc", "cp", "cpp", "cxx", "h", "h++", "hh", "hp", "hpp", "hxx", +#ifndef CASE_INSENSITIVE_FILENAMES + "C", "H", +#endif + NULL + }; + parserDefinition* def = parserNew ("C++"); + def->kinds = CKinds; + def->kindCount = KIND_COUNT (CKinds); + def->extensions = extensions; + def->parser2 = findCTags; + def->initialize = initializeCppParser; + return def; +} + +extern parserDefinition* CsharpParser (void) +{ + static const char *const extensions [] = { "cs", NULL }; + parserDefinition* def = parserNew ("C#"); + def->kinds = CsharpKinds; + def->kindCount = KIND_COUNT (CsharpKinds); + def->extensions = extensions; + def->parser2 = findCTags; + def->initialize = initializeCsharpParser; + return def; +} + +extern parserDefinition* JavaParser (void) +{ + static const char *const extensions [] = { "java", NULL }; + parserDefinition* def = parserNew ("Java"); + def->kinds = JavaKinds; + def->kindCount = KIND_COUNT (JavaKinds); + def->extensions = extensions; + def->parser2 = findCTags; + def->initialize = initializeJavaParser; + return def; +} + +extern parserDefinition* VeraParser (void) +{ + static const char *const extensions [] = { "vr", "vri", "vrh", NULL }; + parserDefinition* def = parserNew ("Vera"); + def->kinds = VeraKinds; + def->kindCount = KIND_COUNT (VeraKinds); + def->extensions = extensions; + def->parser2 = findCTags; + def->initialize = initializeVeraParser; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */ diff --git a/cobol.c b/cobol.c new file mode 100644 index 0000000..e3cdb3e --- /dev/null +++ b/cobol.c @@ -0,0 +1,50 @@ +/* +* $Id: cobol.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for COBOL language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ +#include "parse.h" + +/* +* FUNCTION DEFINITIONS +*/ + +static void installCobolRegex (const langType language) +{ + addTagRegex (language, "^[ \t]*[0-9]+[ \t]+([A-Z0-9][A-Z0-9-]*)[ \t]+(BLANK|OCCURS|IS|JUST|PIC|REDEFINES|RENAMES|SIGN|SYNC|USAGE|VALUE)", + "\\1", "d,data,data items", "i"); + addTagRegex (language, "^[ \t]*[FSR]D[ \t]+([A-Z0-9][A-Z0-9-]*)\\.", + "\\1", "f,file,file descriptions (FD, SD, RD)", "i"); + addTagRegex (language, "^[ \t]*[0-9]+[ \t]+([A-Z0-9][A-Z0-9-]*)\\.", + "\\1", "g,group,group items", "i"); + addTagRegex (language, "^[ \t]*([A-Z0-9][A-Z0-9-]*)\\.", + "\\1", "p,paragraph,paragraphs", "i"); + addTagRegex (language, "^[ \t]*PROGRAM-ID\\.[ \t]+([A-Z0-9][A-Z0-9-]*)\\.", + "\\1", "P,program,program ids", "i"); + addTagRegex (language, "^[ \t]*([A-Z0-9][A-Z0-9-]*)[ \t]+SECTION\\.", + "\\1", "s,section,sections", "i"); +} + +extern parserDefinition* CobolParser () +{ + static const char *const extensions [] = { + "cbl", "cob", "CBL", "COB", NULL }; + parserDefinition* def = parserNew ("Cobol"); + def->extensions = extensions; + def->initialize = installCobolRegex; + def->regex = TRUE; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/config.h.in b/config.h.in new file mode 100644 index 0000000..a45375c --- /dev/null +++ b/config.h.in @@ -0,0 +1,277 @@ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Define this label if your system uses case-insensitive file names */ +#undef CASE_INSENSITIVE_FILENAMES + +/* Define this label if you wish to check the regcomp() function at run time + for correct behavior. This function is currently broken on Cygwin. */ +#undef CHECK_REGCOMP + +/* You can define this label to be a string containing the name of a + site-specific configuration file containing site-wide default options. The + files /etc/ctags.conf and /usr/local/etc/ctags.conf are already checked, so + only define one here if you need a file somewhere else. */ +#undef CUSTOM_CONFIGURATION_FILE + + +/* Define this as desired. + * 1: Original ctags format + * 2: Extended ctags format with extension flags in EX-style comment. + */ +#define DEFAULT_FILE_FORMAT 2 + + + +/* Define this label to use the system sort utility (which is probably more +* efficient) over the internal sorting algorithm. +*/ +#ifndef INTERNAL_SORT +# undef EXTERNAL_SORT +#endif + + +/* Define to 1 if you have the `chmod' function. */ +#undef HAVE_CHMOD + +/* Define to 1 if you have the `chsize' function. */ +#undef HAVE_CHSIZE + +/* Define to 1 if you have the `clock' function. */ +#undef HAVE_CLOCK + +/* Define to 1 if you have the header file. */ +#undef HAVE_DIRENT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_FCNTL_H + +/* Define to 1 if you have the `fgetpos' function. */ +#undef HAVE_FGETPOS + +/* Define to 1 if you have the `findfirst' function. */ +#undef HAVE_FINDFIRST + +/* Define to 1 if you have the `fnmatch' function. */ +#undef HAVE_FNMATCH + +/* Define to 1 if you have the header file. */ +#undef HAVE_FNMATCH_H + +/* Define to 1 if you have the `ftruncate' function. */ +#undef HAVE_FTRUNCATE + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the `mkstemp' function. */ +#undef HAVE_MKSTEMP + +/* Define to 1 if you have the `opendir' function. */ +#undef HAVE_OPENDIR + +/* Define to 1 if you have the `putenv' function. */ +#undef HAVE_PUTENV + +/* Define to 1 if you have the `regcomp' function. */ +#undef HAVE_REGCOMP + +/* Define to 1 if you have the `remove' function. */ +#undef HAVE_REMOVE + +/* Define to 1 if you have the `setenv' function. */ +#undef HAVE_SETENV + +/* Define to 1 if you have the header file. */ +#undef HAVE_STAT_H + +/* Define this macro if the field "st_ino" exists in struct stat in + . */ +#undef HAVE_STAT_ST_INO + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the `strcasecmp' function. */ +#undef HAVE_STRCASECMP + +/* Define to 1 if you have the `strerror' function. */ +#undef HAVE_STRERROR + +/* Define to 1 if you have the `stricmp' function. */ +#undef HAVE_STRICMP + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the `strncasecmp' function. */ +#undef HAVE_STRNCASECMP + +/* Define to 1 if you have the `strnicmp' function. */ +#undef HAVE_STRNICMP + +/* Define to 1 if you have the `strstr' function. */ +#undef HAVE_STRSTR + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_DIR_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TIMES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the `tempnam' function. */ +#undef HAVE_TEMPNAM + +/* Define to 1 if you have the `times' function. */ +#undef HAVE_TIMES + +/* Define to 1 if you have the header file. */ +#undef HAVE_TIME_H + +/* Define to 1 if you have the `truncate' function. */ +#undef HAVE_TRUNCATE + +/* Define to 1 if you have the header file. */ +#undef HAVE_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if you have the `_findfirst' function. */ +#undef HAVE__FINDFIRST + +/* Define as the maximum integer on your system if not defined . */ +#undef INT_MAX + +/* Define to the appropriate size for tmpnam() if does not define + this. */ +#undef L_tmpnam + +/* Define this label if you want macro tags (defined lables) to use patterns + in the EX command by default (original ctags behavior is to use line + numbers). */ +#undef MACROS_USE_PATTERNS + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +#undef NEED_PROTO_FGETPOS + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +#undef NEED_PROTO_FTRUNCATE + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +#undef NEED_PROTO_GETENV + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +#undef NEED_PROTO_LSTAT + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +#undef NEED_PROTO_MALLOC + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +#undef NEED_PROTO_REMOVE + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +#undef NEED_PROTO_STAT + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +#undef NEED_PROTO_TRUNCATE + +/* If you receive error or warning messages indicating that you are missing a + prototype for, or a type mismatch using, the following function, define + this label and remake. */ +#undef NEED_PROTO_UNLINK + +/* Define this is you have a prototype for putenv() in , but doesn't + declare its argument as "const char *". */ +#undef NON_CONST_PUTENV_PROTOTYPE + +/* Package name. */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define this label if regcomp() is broken. */ +#undef REGCOMP_BROKEN + +/* Define this value used by fseek() appropriately if (or + on SunOS 4.1.x) does not define them. */ +#undef SEEK_SET + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define this label if your system supports starting scripts with a line of + the form "#! /bin/sh" to select the interpreter to use for the script. */ +#undef SYS_INTERPRETER + +/* If you wish to change the directory in which temporary files are stored, + define this label to the directory desired. */ +#undef TMPDIR + +/* Package version. */ +#undef VERSION + +/* This corrects the problem of missing prototypes for certain functions in + some GNU installations (e.g. SunOS 4.1.x). */ +#undef __USE_FIXED_PROTOTYPES__ + +/* Define to the appropriate type if does not define this. */ +#undef clock_t + +/* Define to empty if `const' does not conform to ANSI C. */ +#undef const + +/* Define to long if does not define this. */ +#undef fpos_t + +/* Define to `long int' if does not define. */ +#undef off_t + +/* Define remove to unlink if you have unlink(), but not remove(). */ +#undef remove + +/* Define to `unsigned int' if does not define. */ +#undef size_t diff --git a/configure b/configure new file mode 100755 index 0000000..f137cf1 --- /dev/null +++ b/configure @@ -0,0 +1,7704 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.61. +# +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, +# 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; +esac + +fi + + + + +# PATH needs CR +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +as_nl=' +' +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + { (exit 1); exit 1; } +fi + +# Work around bugs in pre-3.0 UWIN ksh. +for as_var in ENV MAIL MAILPATH +do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +for as_var in \ + LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ + LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ + LC_TELEPHONE LC_TIME +do + if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then + eval $as_var=C; export $as_var + else + ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var + fi +done + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# CDPATH. +$as_unset CDPATH + + +if test "x$CONFIG_SHELL" = x; then + if (eval ":") 2>/dev/null; then + as_have_required=yes +else + as_have_required=no +fi + + if test $as_have_required = yes && (eval ": +(as_func_return () { + (exit \$1) +} +as_func_success () { + as_func_return 0 +} +as_func_failure () { + as_func_return 1 +} +as_func_ret_success () { + return 0 +} +as_func_ret_failure () { + return 1 +} + +exitcode=0 +if as_func_success; then + : +else + exitcode=1 + echo as_func_success failed. +fi + +if as_func_failure; then + exitcode=1 + echo as_func_failure succeeded. +fi + +if as_func_ret_success; then + : +else + exitcode=1 + echo as_func_ret_success failed. +fi + +if as_func_ret_failure; then + exitcode=1 + echo as_func_ret_failure succeeded. +fi + +if ( set x; as_func_ret_success y && test x = \"\$1\" ); then + : +else + exitcode=1 + echo positional parameters were not saved. +fi + +test \$exitcode = 0) || { (exit 1); exit 1; } + +( + as_lineno_1=\$LINENO + as_lineno_2=\$LINENO + test \"x\$as_lineno_1\" != \"x\$as_lineno_2\" && + test \"x\`expr \$as_lineno_1 + 1\`\" = \"x\$as_lineno_2\") || { (exit 1); exit 1; } +") 2> /dev/null; then + : +else + as_candidate_shells= + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + case $as_dir in + /*) + for as_base in sh bash ksh sh5; do + as_candidate_shells="$as_candidate_shells $as_dir/$as_base" + done;; + esac +done +IFS=$as_save_IFS + + + for as_shell in $as_candidate_shells $SHELL; do + # Try only shells that exist, to save several forks. + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { ("$as_shell") 2> /dev/null <<\_ASEOF +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; +esac + +fi + + +: +_ASEOF +}; then + CONFIG_SHELL=$as_shell + as_have_required=yes + if { "$as_shell" 2> /dev/null <<\_ASEOF +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; +esac + +fi + + +: +(as_func_return () { + (exit $1) +} +as_func_success () { + as_func_return 0 +} +as_func_failure () { + as_func_return 1 +} +as_func_ret_success () { + return 0 +} +as_func_ret_failure () { + return 1 +} + +exitcode=0 +if as_func_success; then + : +else + exitcode=1 + echo as_func_success failed. +fi + +if as_func_failure; then + exitcode=1 + echo as_func_failure succeeded. +fi + +if as_func_ret_success; then + : +else + exitcode=1 + echo as_func_ret_success failed. +fi + +if as_func_ret_failure; then + exitcode=1 + echo as_func_ret_failure succeeded. +fi + +if ( set x; as_func_ret_success y && test x = "$1" ); then + : +else + exitcode=1 + echo positional parameters were not saved. +fi + +test $exitcode = 0) || { (exit 1); exit 1; } + +( + as_lineno_1=$LINENO + as_lineno_2=$LINENO + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2") || { (exit 1); exit 1; } + +_ASEOF +}; then + break +fi + +fi + + done + + if test "x$CONFIG_SHELL" != x; then + for as_var in BASH_ENV ENV + do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var + done + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"} +fi + + + if test $as_have_required = no; then + echo This script requires a shell more modern than all the + echo shells that I found on your system. Please install a + echo modern shell, or manually run the script under such a + echo shell if you do have one. + { (exit 1); exit 1; } +fi + + +fi + +fi + + + +(eval "as_func_return () { + (exit \$1) +} +as_func_success () { + as_func_return 0 +} +as_func_failure () { + as_func_return 1 +} +as_func_ret_success () { + return 0 +} +as_func_ret_failure () { + return 1 +} + +exitcode=0 +if as_func_success; then + : +else + exitcode=1 + echo as_func_success failed. +fi + +if as_func_failure; then + exitcode=1 + echo as_func_failure succeeded. +fi + +if as_func_ret_success; then + : +else + exitcode=1 + echo as_func_ret_success failed. +fi + +if as_func_ret_failure; then + exitcode=1 + echo as_func_ret_failure succeeded. +fi + +if ( set x; as_func_ret_success y && test x = \"\$1\" ); then + : +else + exitcode=1 + echo positional parameters were not saved. +fi + +test \$exitcode = 0") || { + echo No shell found that supports shell functions. + echo Please tell autoconf@gnu.org about your system, + echo including any error possibly output before this + echo message +} + + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line after each line using $LINENO; the second 'sed' + # does the real work. The second script uses 'N' to pair each + # line-number line with the line containing $LINENO, and appends + # trailing '-' during substitution so that $LINENO is not a special + # case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # scripts with optimization help from Paolo Bonzini. Blame Lee + # E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in +-n*) + case `echo 'x\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + *) ECHO_C='\c';; + esac;; +*) + ECHO_N='-n';; +esac + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir +fi +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -p'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -p' +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +if test -x / >/dev/null 2>&1; then + as_test_x='test -x' +else + if ls -dL / >/dev/null 2>&1; then + as_ls_L_option=L + else + as_ls_L_option= + fi + as_test_x=' + eval sh -c '\'' + if test -d "$1"; then + test -d "$1/."; + else + case $1 in + -*)set "./$1";; + esac; + case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in + ???[sx]*):;;*)false;;esac;fi + '\'' sh + ' +fi +as_executable_p=$as_test_x + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + + +exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= +SHELL=${CONFIG_SHELL-/bin/sh} + +# Identity of this package. +PACKAGE_NAME= +PACKAGE_TARNAME= +PACKAGE_VERSION= +PACKAGE_STRING= +PACKAGE_BUGREPORT= + +ac_unique_file="ctags.h" +ac_header_list= +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='SHELL +PATH_SEPARATOR +PACKAGE_NAME +PACKAGE_TARNAME +PACKAGE_VERSION +PACKAGE_STRING +PACKAGE_BUGREPORT +exec_prefix +prefix +program_transform_name +bindir +sbindir +libexecdir +datarootdir +datadir +sysconfdir +sharedstatedir +localstatedir +includedir +oldincludedir +docdir +infodir +htmldir +dvidir +pdfdir +psdir +libdir +localedir +mandir +DEFS +ECHO_C +ECHO_N +ECHO_T +LIBS +build_alias +host_alias +target_alias +install_targets +CC +CFLAGS +LDFLAGS +CPPFLAGS +ac_ct_CC +EXEEXT +OBJEXT +LN_S +STRIP +sort_found +CPP +GREP +EGREP +LIBOBJS +LTLIBOBJS' +ac_subst_files='' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CPP' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_feature" : ".*[^-._$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid feature name: $ac_feature" >&2 + { (exit 1); exit 1; }; } + ac_feature=`echo $ac_feature | sed 's/[-.]/_/g'` + eval enable_$ac_feature=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_feature" : ".*[^-._$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid feature name: $ac_feature" >&2 + { (exit 1); exit 1; }; } + ac_feature=`echo $ac_feature | sed 's/[-.]/_/g'` + eval enable_$ac_feature=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_package" : ".*[^-._$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid package name: $ac_package" >&2 + { (exit 1); exit 1; }; } + ac_package=`echo $ac_package | sed 's/[-.]/_/g'` + eval with_$ac_package=\$ac_optarg ;; + + -without-* | --without-*) + ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_package" : ".*[^-._$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid package name: $ac_package" >&2 + { (exit 1); exit 1; }; } + ac_package=`echo $ac_package | sed 's/[-.]/_/g'` + eval with_$ac_package=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) { echo "$as_me: error: unrecognized option: $ac_option +Try \`$0 --help' for more information." >&2 + { (exit 1); exit 1; }; } + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid variable name: $ac_envvar" >&2 + { (exit 1); exit 1; }; } + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + { echo "$as_me: error: missing argument to $ac_option" >&2 + { (exit 1); exit 1; }; } +fi + +# Be sure to have absolute directory names. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir +do + eval ac_val=\$$ac_var + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; } +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host. + If a cross compiler is detected then cross compile mode will be used." >&2 + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + { echo "$as_me: error: Working directory cannot be determined" >&2 + { (exit 1); exit 1; }; } +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + { echo "$as_me: error: pwd does not report name of working directory" >&2 + { (exit 1); exit 1; }; } + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$0" || +$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$0" : 'X\(//\)[^/]' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +echo X"$0" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 + { (exit 1); exit 1; }; } +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || { echo "$as_me: error: $ac_msg" >&2 + { (exit 1); exit 1; }; } + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures this package to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/PACKAGE] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF +_ACEOF +fi + +if test -n "$ac_init_help"; then + + cat <<\_ACEOF + +Optional Features: + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --enable-etags enable the installation of links for etags + --disable-extended-format + disable extension flags; use original ctags file + format only + --disable-external-sort use internal sort algorithm instead of sort program + --enable-custom-config=FILE + enable custom config file for site-wide defaults + --enable-macro-patterns use patterns as default method to locate macros + instead of line numbers + --enable-maintainer-mode + use maintainer makefile + --enable-shell-globbing=DIR + does shell expand wildcards (yes|no)? yes + --enable-tmpdir=DIR default directory for temporary files ARG=/tmp + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-posix-regex use Posix regex interface, if available + --with-readlib include readtags library object during install + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CPP C preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +configure +generated by GNU Autoconf 2.61 + +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, +2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by $as_me, which was +generated by GNU Autoconf 2.61. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + echo "PATH: $as_dir" +done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;; + 2) + ac_configure_args1="$ac_configure_args1 '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + ac_configure_args="$ac_configure_args '$ac_arg'" + ;; + esac + done +done +$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; } +$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; } + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + cat <<\_ASBOX +## ---------------- ## +## Cache variables. ## +## ---------------- ## +_ASBOX + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5 +echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + *) $as_unset $ac_var ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + cat <<\_ASBOX +## ----------------- ## +## Output variables. ## +## ----------------- ## +_ASBOX + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + cat <<\_ASBOX +## ------------------- ## +## File substitutions. ## +## ------------------- ## +_ASBOX + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + cat <<\_ASBOX +## ----------- ## +## confdefs.h. ## +## ----------- ## +_ASBOX + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + echo "$as_me: caught signal $ac_signal" + echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer explicitly selected file to automatically selected ones. +if test -n "$CONFIG_SITE"; then + set x "$CONFIG_SITE" +elif test "x$prefix" != xNONE; then + set x "$prefix/share/config.site" "$prefix/etc/config.site" +else + set x "$ac_default_prefix/share/config.site" \ + "$ac_default_prefix/etc/config.site" +fi +shift +for ac_site_file +do + if test -r "$ac_site_file"; then + { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 +echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special + # files actually), so we avoid doing that. + if test -f "$cache_file"; then + { echo "$as_me:$LINENO: loading cache $cache_file" >&5 +echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { echo "$as_me:$LINENO: creating cache $cache_file" >&5 +echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +ac_header_list="$ac_header_list dirent.h" +ac_header_list="$ac_header_list fcntl.h" +ac_header_list="$ac_header_list fnmatch.h" +ac_header_list="$ac_header_list stat.h" +ac_header_list="$ac_header_list stdlib.h" +ac_header_list="$ac_header_list string.h" +ac_header_list="$ac_header_list time.h" +ac_header_list="$ac_header_list types.h" +ac_header_list="$ac_header_list unistd.h" +ac_header_list="$ac_header_list sys/dir.h" +ac_header_list="$ac_header_list sys/stat.h" +ac_header_list="$ac_header_list sys/times.h" +ac_header_list="$ac_header_list sys/types.h" +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5 +echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5 +echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + { echo "$as_me:$LINENO: former value: $ac_old_val" >&5 +echo "$as_me: former value: $ac_old_val" >&2;} + { echo "$as_me:$LINENO: current value: $ac_new_val" >&5 +echo "$as_me: current value: $ac_new_val" >&2;} + ac_cache_corrupted=: + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) ac_configure_args="$ac_configure_args '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5 +echo "$as_me: error: changes in the environment can compromise the build" >&2;} + { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5 +echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;} + { (exit 1); exit 1; }; } +fi + + + + + + + + + + + + + + + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +ac_config_headers="$ac_config_headers config.h" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# Report system info +# ------------------ +program_name=`grep 'PROGRAM_NAME *"' ctags.h | sed -e 's/.*"\([^"]*\)".*/\1/'` +program_version=`grep 'PROGRAM_VERSION *"' ctags.h | sed -e 's/.*"\([^"]*\)".*/\1/'` +echo "$program_name, version $program_version" +uname -mrsv 2>/dev/null + +# Define convenience macros +# ------------------------- +# CHECK_HEADER_DEFINE(LABEL, HEADER [,ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND ] ]) + + +# Checks for configuration options +# -------------------------------- + + +# Check whether --with-posix-regex was given. +if test "${with_posix_regex+set}" = set; then + withval=$with_posix_regex; +fi + + + +# Check whether --with-readlib was given. +if test "${with_readlib+set}" = set; then + withval=$with_readlib; +fi + + +# AC_ARG_WITH(perl-regex, +# [ --with-perl-regex use Perl pcre interface, if available]) + +# Check whether --enable-etags was given. +if test "${enable_etags+set}" = set; then + enableval=$enable_etags; +fi + + +# Check whether --enable-extended-format was given. +if test "${enable_extended_format+set}" = set; then + enableval=$enable_extended_format; cat >>confdefs.h <<\_ACEOF +#define DEFAULT_FILE_FORMAT 1 +_ACEOF + +else + cat >>confdefs.h <<\_ACEOF +#define DEFAULT_FILE_FORMAT 2 +_ACEOF + +fi + + +# Check whether --enable-external-sort was given. +if test "${enable_external_sort+set}" = set; then + enableval=$enable_external_sort; +fi + + +# Check whether --enable-custom-config was given. +if test "${enable_custom_config+set}" = set; then + enableval=$enable_custom_config; +fi + + +# Check whether --enable-macro-patterns was given. +if test "${enable_macro_patterns+set}" = set; then + enableval=$enable_macro_patterns; +fi + + +# Check whether --enable-maintainer-mode was given. +if test "${enable_maintainer_mode+set}" = set; then + enableval=$enable_maintainer_mode; +fi + + +# Check whether --enable-shell-globbing was given. +if test "${enable_shell_globbing+set}" = set; then + enableval=$enable_shell_globbing; +fi + + +# Check whether --enable-tmpdir was given. +if test "${enable_tmpdir+set}" = set; then + enableval=$enable_tmpdir; tmpdir_specified=yes +fi + + + +# Process configuration options +# ----------------------------- + +if test "$enable_maintainer_mode" = yes ; then + { echo "$as_me:$LINENO: result: enabling maintainer mode" >&5 +echo "${ECHO_T}enabling maintainer mode" >&6; } +fi + +install_targets="install-ctags" +{ echo "$as_me:$LINENO: checking whether to install link to etags" >&5 +echo $ECHO_N "checking whether to install link to etags... $ECHO_C" >&6; } +if test yes = "$enable_etags"; then + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + install_targets="$install_targets install-etags" +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi +{ echo "$as_me:$LINENO: checking whether to install readtags object file" >&5 +echo $ECHO_N "checking whether to install readtags object file... $ECHO_C" >&6; } +if test yes = "$with_readlib"; then + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + install_targets="$install_targets install-lib" +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + +if test "$enable_custom_config" = no -o "$enable_custom_config" = yes ; then + { echo "$as_me:$LINENO: result: no name supplied for custom configuration file" >&5 +echo "${ECHO_T}no name supplied for custom configuration file" >&6; } +elif test -n "$enable_custom_config" ; then + cat >>confdefs.h <<_ACEOF +#define CUSTOM_CONFIGURATION_FILE "$enable_custom_config" +_ACEOF + + { echo "$as_me:$LINENO: result: $enable_custom_config will be used as custom configuration file" >&5 +echo "${ECHO_T}$enable_custom_config will be used as custom configuration file" >&6; } +fi + +if test "$enable_macro_patterns" = yes ; then + cat >>confdefs.h <<\_ACEOF +#define MACROS_USE_PATTERNS 1 +_ACEOF + + { echo "$as_me:$LINENO: result: tag file will use patterns for macros by default" >&5 +echo "${ECHO_T}tag file will use patterns for macros by default" >&6; } +fi + +# Checks for programs +# ------------------- + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CC="gcc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools +whose name does not start with the host triplet. If you think this +configuration is useful to you, please write to autoconf@gnu.org." >&5 +echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools +whose name does not start with the host triplet. If you think this +configuration is useful to you, please write to autoconf@gnu.org." >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CC="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools +whose name does not start with the host triplet. If you think this +configuration is useful to you, please write to autoconf@gnu.org." >&5 +echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools +whose name does not start with the host triplet. If you think this +configuration is useful to you, please write to autoconf@gnu.org." >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&5 +echo "$as_me: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } + +# Provide some information about the compiler. +echo "$as_me:$LINENO: checking for C compiler version" >&5 +ac_compiler=`set X $ac_compile; echo $2` +{ (ac_try="$ac_compiler --version >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compiler --version >&5") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (ac_try="$ac_compiler -v >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compiler -v >&5") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (ac_try="$ac_compiler -V >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compiler -V >&5") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ echo "$as_me:$LINENO: checking for C compiler default output file name" >&5 +echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6; } +ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` +# +# List of possible output files, starting from the most likely. +# The algorithm is not robust to junk in `.', hence go to wildcards (a.*) +# only as a last resort. b.out is created by i960 compilers. +ac_files='a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out' +# +# The IRIX 6 linker writes into existing files which may not be +# executable, retaining their permissions. Remove them first so a +# subsequent execution test works. +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { (ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi + +{ echo "$as_me:$LINENO: result: $ac_file" >&5 +echo "${ECHO_T}$ac_file" >&6; } +if test -z "$ac_file"; then + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { echo "$as_me:$LINENO: error: C compiler cannot create executables +See \`config.log' for more details." >&5 +echo "$as_me: error: C compiler cannot create executables +See \`config.log' for more details." >&2;} + { (exit 77); exit 77; }; } +fi + +ac_exeext=$ac_cv_exeext + +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ echo "$as_me:$LINENO: checking whether the C compiler works" >&5 +echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6; } +# FIXME: These cross compiler hacks should be removed for Autoconf 3.0 +# If not cross compiling, check that we can run a simple program. +if test "$cross_compiling" != yes; then + if { ac_try='./$ac_file' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { echo "$as_me:$LINENO: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } + fi + fi +fi +{ echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + +rm -f a.out a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 +echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6; } +{ echo "$as_me:$LINENO: result: $cross_compiling" >&5 +echo "${ECHO_T}$cross_compiling" >&6; } + +{ echo "$as_me:$LINENO: checking for suffix of executables" >&5 +echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6; } +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f conftest$ac_cv_exeext +{ echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 +echo "${ECHO_T}$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +{ echo "$as_me:$LINENO: checking for suffix of object files" >&5 +echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6; } +if test "${ac_cv_objext+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 +echo "${ECHO_T}$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 +echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6; } +if test "${ac_cv_c_compiler_gnu+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_compiler_gnu=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_compiler_gnu=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 +echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6; } +GCC=`test $ac_compiler_gnu = yes && echo yes` +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 +echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6; } +if test "${ac_cv_prog_cc_g+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cc_g=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + CFLAGS="" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cc_g=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5 +echo $ECHO_N "checking for $CC option to accept ISO C89... $ECHO_C" >&6; } +if test "${ac_cv_prog_cc_c89+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#include +#include +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cc_c89=$ac_arg +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { echo "$as_me:$LINENO: result: none needed" >&5 +echo "${ECHO_T}none needed" >&6; } ;; + xno) + { echo "$as_me:$LINENO: result: unsupported" >&5 +echo "${ECHO_T}unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_c89" >&6; } ;; +esac + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +case `uname` in + HP-UX) + { echo "$as_me:$LINENO: checking HP-UX native compiler" >&5 +echo $ECHO_N "checking HP-UX native compiler... $ECHO_C" >&6; } + if test "$CC" = "cc"; then + { echo "$as_me:$LINENO: result: yes; adding compiler options for ANSI support" >&5 +echo "${ECHO_T}yes; adding compiler options for ANSI support" >&6; } + CFLAGS="$CFLAGS -Aa -D_HPUX_SOURCE" + else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + fi + ;; + SunOS) + if uname -r | grep '5\.' >/dev/null 2>&1; then + { echo "$as_me:$LINENO: checking Solaris native compiler" >&5 +echo $ECHO_N "checking Solaris native compiler... $ECHO_C" >&6; } + if test "$CC" = "cc" -a "`which cc`" = "/usr/ucb/cc"; then + { echo "$as_me:$LINENO: result: yes; adding compiler option for ANSI support" >&5 +echo "${ECHO_T}yes; adding compiler option for ANSI support" >&6; } + CC="$CC -Xa" + else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + fi + fi + ;; +esac + +{ echo "$as_me:$LINENO: checking whether ln -s works" >&5 +echo $ECHO_N "checking whether ln -s works... $ECHO_C" >&6; } +LN_S=$as_ln_s +if test "$LN_S" = "ln -s"; then + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } +else + { echo "$as_me:$LINENO: result: no, using $LN_S" >&5 +echo "${ECHO_T}no, using $LN_S" >&6; } +fi + +# Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_STRIP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_STRIP="strip" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + + test -z "$ac_cv_prog_STRIP" && ac_cv_prog_STRIP=":" +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { echo "$as_me:$LINENO: result: $STRIP" >&5 +echo "${ECHO_T}$STRIP" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + + + +# Checks for operating environment +# -------------------------------- + +# Check for temporary directory +{ echo "$as_me:$LINENO: checking directory to use for temporary files" >&5 +echo $ECHO_N "checking directory to use for temporary files... $ECHO_C" >&6; } +if test -n "$enable_tmpdir"; then + tmpdir="$enable_tmpdir" +elif test -n "$TMPDIR"; then + tmpdir="$TMPDIR" +elif test -n "$TMPDIR"; then + tmpdir="$TMPDIR" +elif test -n "$TMP"; then + tmpdir="$TMP" +elif test -n "$TEMP"; then + tmpdir="$TEMP" +elif test -d "c:/"; then + tmpdir="c:/" +else + tmpdir="/tmp" +fi +if test -d $tmpdir ; then + { echo "$as_me:$LINENO: result: $tmpdir" >&5 +echo "${ECHO_T}$tmpdir" >&6; } + cat >>confdefs.h <<_ACEOF +#define TMPDIR "$tmpdir" +_ACEOF + +else + { { echo "$as_me:$LINENO: error: $tmpdir does not exist" >&5 +echo "$as_me: error: $tmpdir does not exist" >&2;} + { (exit 1); exit 1; }; } +fi + +# Check whether system supports #! scripts +{ echo "$as_me:$LINENO: checking whether #! works in shell scripts" >&5 +echo $ECHO_N "checking whether #! works in shell scripts... $ECHO_C" >&6; } +if test "${ac_cv_sys_interpreter+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + echo '#! /bin/cat +exit 69 +' >conftest +chmod u+x conftest +(SHELL=/bin/sh; export SHELL; ./conftest >/dev/null 2>&1) +if test $? -ne 69; then + ac_cv_sys_interpreter=yes +else + ac_cv_sys_interpreter=no +fi +rm -f conftest +fi +{ echo "$as_me:$LINENO: result: $ac_cv_sys_interpreter" >&5 +echo "${ECHO_T}$ac_cv_sys_interpreter" >&6; } +interpval=$ac_cv_sys_interpreter + +if test yes = "$interpval"; then + cat >>confdefs.h <<\_ACEOF +#define SYS_INTERPRETER 1 +_ACEOF + +fi + +# Test for case-insensitive filenames +{ echo "$as_me:$LINENO: checking for case-insensitive filenames" >&5 +echo $ECHO_N "checking for case-insensitive filenames... $ECHO_C" >&6; } +touch conftest.cif +if test -f CONFTEST.CIF; then + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + cat >>confdefs.h <<\_ACEOF +#define CASE_INSENSITIVE_FILENAMES 1 +_ACEOF + +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi +rm -f conftest.cif + +{ echo "$as_me:$LINENO: checking selected sort method" >&5 +echo $ECHO_N "checking selected sort method... $ECHO_C" >&6; } +if test no = "$enable_external_sort"; then + { echo "$as_me:$LINENO: result: simple internal algorithm" >&5 +echo "${ECHO_T}simple internal algorithm" >&6; } +else + { echo "$as_me:$LINENO: result: external sort utility" >&5 +echo "${ECHO_T}external sort utility" >&6; } + enable_external_sort=no + # Extract the first word of "sort", so it can be a program name with args. +set dummy sort; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_prog_sort_found+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$sort_found"; then + ac_cv_prog_sort_found="$sort_found" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_sort_found="yes" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + + test -z "$ac_cv_prog_sort_found" && ac_cv_prog_sort_found="no" +fi +fi +sort_found=$ac_cv_prog_sort_found +if test -n "$sort_found"; then + { echo "$as_me:$LINENO: result: $sort_found" >&5 +echo "${ECHO_T}$sort_found" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + + if test "$sort_found" = yes ; then + { echo "$as_me:$LINENO: checking if sort accepts our command line" >&5 +echo $ECHO_N "checking if sort accepts our command line... $ECHO_C" >&6; } + touch ${tmpdir}/sort.test + sort -u -f -o ${tmpdir}/sort.test ${tmpdir}/sort.test 1>/dev/null 2>&1 + if test $? -ne 0 ; then + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + else + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + cat >>confdefs.h <<\_ACEOF +#define EXTERNAL_SORT 1 +_ACEOF + + enable_external_sort=yes + fi + rm -f ${tmpdir}/sort.test + fi +fi +if test "$enable_external_sort" != yes ; then + { echo "$as_me:$LINENO: result: using internal sort algorithm as fallback" >&5 +echo "${ECHO_T}using internal sort algorithm as fallback" >&6; } +fi + + +# Checks for header files +# ----------------------- + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5 +echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if test "${ac_cv_prog_CPP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Broken: fails on valid input. +continue +fi + +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then + # Broken: success on invalid input. +continue +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Passes both tests. +ac_preproc_ok=: +break +fi + +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ echo "$as_me:$LINENO: result: $CPP" >&5 +echo "${ECHO_T}$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Broken: fails on valid input. +continue +fi + +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then + # Broken: success on invalid input. +continue +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Passes both tests. +ac_preproc_ok=: +break +fi + +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + : +else + { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details." >&5 +echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +{ echo "$as_me:$LINENO: checking for grep that handles long lines and -e" >&5 +echo $ECHO_N "checking for grep that handles long lines and -e... $ECHO_C" >&6; } +if test "${ac_cv_path_GREP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + # Extract the first word of "grep ggrep" to use in msg output +if test -z "$GREP"; then +set dummy grep ggrep; ac_prog_name=$2 +if test "${ac_cv_path_GREP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_path_GREP_found=false +# Loop through the user's path and test for each of PROGNAME-LIST +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue + # Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + echo $ECHO_N "0123456789$ECHO_C" >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + ac_count=`expr $ac_count + 1` + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + + $ac_path_GREP_found && break 3 + done +done + +done +IFS=$as_save_IFS + + +fi + +GREP="$ac_cv_path_GREP" +if test -z "$GREP"; then + { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 +echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} + { (exit 1); exit 1; }; } +fi + +else + ac_cv_path_GREP=$GREP +fi + + +fi +{ echo "$as_me:$LINENO: result: $ac_cv_path_GREP" >&5 +echo "${ECHO_T}$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ echo "$as_me:$LINENO: checking for egrep" >&5 +echo $ECHO_N "checking for egrep... $ECHO_C" >&6; } +if test "${ac_cv_path_EGREP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + # Extract the first word of "egrep" to use in msg output +if test -z "$EGREP"; then +set dummy egrep; ac_prog_name=$2 +if test "${ac_cv_path_EGREP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_path_EGREP_found=false +# Loop through the user's path and test for each of PROGNAME-LIST +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue + # Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + echo $ECHO_N "0123456789$ECHO_C" >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + ac_count=`expr $ac_count + 1` + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + + $ac_path_EGREP_found && break 3 + done +done + +done +IFS=$as_save_IFS + + +fi + +EGREP="$ac_cv_path_EGREP" +if test -z "$EGREP"; then + { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 +echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} + { (exit 1); exit 1; }; } +fi + +else + ac_cv_path_EGREP=$EGREP +fi + + + fi +fi +{ echo "$as_me:$LINENO: result: $ac_cv_path_EGREP" >&5 +echo "${ECHO_T}$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ echo "$as_me:$LINENO: checking for ANSI C header files" >&5 +echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6; } +if test "${ac_cv_header_stdc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_header_stdc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_header_stdc=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then + : +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then + : +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then + : +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +rm -f conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: program exited with status $ac_status" >&5 +echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi + + +fi +fi +{ echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 +echo "${ECHO_T}$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +cat >>confdefs.h <<\_ACEOF +#define STDC_HEADERS 1 +_ACEOF + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. + + + + + + + + + +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } +if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default + +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + eval "$as_ac_Header=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_Header=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_Header'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + + + + + +for ac_header in $ac_header_list +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then + { echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } +if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +ac_res=`eval echo '${'$as_ac_Header'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +else + # Is the header compilable? +{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 +echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_compiler=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6; } + +# Is the header present? +{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 +echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <$ac_header> +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi + +rm -f conftest.err conftest.$ac_ext +{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 +echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} + + ;; +esac +{ echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } +if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + eval "$as_ac_Header=\$ac_header_preproc" +fi +ac_res=`eval echo '${'$as_ac_Header'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } + +fi +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# Checks for header file macros +# ----------------------------- + + + { echo "$as_me:$LINENO: checking if L_tmpnam is defined in stdio.h" >&5 +echo $ECHO_N "checking if L_tmpnam is defined in stdio.h... $ECHO_C" >&6; } + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#ifdef L_tmpnam + yes +#endif + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "yes" >/dev/null 2>&1; then + + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + + +else + + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + cat >>confdefs.h <<\_ACEOF +#define L_tmpnam 20 +_ACEOF + + +fi +rm -f conftest* + + + + { echo "$as_me:$LINENO: checking if INT_MAX is defined in limits.h" >&5 +echo $ECHO_N "checking if INT_MAX is defined in limits.h... $ECHO_C" >&6; } + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#ifdef INT_MAX + yes +#endif + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "yes" >/dev/null 2>&1; then + + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + + +else + + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + + { echo "$as_me:$LINENO: checking if MAXINT is defined in limits.h" >&5 +echo $ECHO_N "checking if MAXINT is defined in limits.h... $ECHO_C" >&6; } + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#ifdef MAXINT + yes +#endif + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "yes" >/dev/null 2>&1; then + + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + cat >>confdefs.h <<\_ACEOF +#define INT_MAX MAXINT +_ACEOF + + +else + + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + cat >>confdefs.h <<\_ACEOF +#define INT_MAX 32767 +_ACEOF + + +fi +rm -f conftest* + + +fi +rm -f conftest* + + + +# Checks for typedefs +# ------------------- + +{ echo "$as_me:$LINENO: checking for size_t" >&5 +echo $ECHO_N "checking for size_t... $ECHO_C" >&6; } +if test "${ac_cv_type_size_t+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +typedef size_t ac__type_new_; +int +main () +{ +if ((ac__type_new_ *) 0) + return 0; +if (sizeof (ac__type_new_)) + return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_type_size_t=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_type_size_t=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ echo "$as_me:$LINENO: result: $ac_cv_type_size_t" >&5 +echo "${ECHO_T}$ac_cv_type_size_t" >&6; } +if test $ac_cv_type_size_t = yes; then + : +else + +cat >>confdefs.h <<_ACEOF +#define size_t unsigned int +_ACEOF + +fi + +{ echo "$as_me:$LINENO: checking for off_t" >&5 +echo $ECHO_N "checking for off_t... $ECHO_C" >&6; } +if test "${ac_cv_type_off_t+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +typedef off_t ac__type_new_; +int +main () +{ +if ((ac__type_new_ *) 0) + return 0; +if (sizeof (ac__type_new_)) + return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_type_off_t=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_type_off_t=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ echo "$as_me:$LINENO: result: $ac_cv_type_off_t" >&5 +echo "${ECHO_T}$ac_cv_type_off_t" >&6; } +if test $ac_cv_type_off_t = yes; then + : +else + +cat >>confdefs.h <<_ACEOF +#define off_t long int +_ACEOF + +fi + + +{ echo "$as_me:$LINENO: checking for fpos_t" >&5 +echo $ECHO_N "checking for fpos_t... $ECHO_C" >&6; } +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "fpos_t" >/dev/null 2>&1; then + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } +else + + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + cat >>confdefs.h <<\_ACEOF +#define fpos_t long +_ACEOF + + +fi +rm -f conftest* + + +{ echo "$as_me:$LINENO: checking for clock_t" >&5 +echo $ECHO_N "checking for clock_t... $ECHO_C" >&6; } +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "clock_t" >/dev/null 2>&1; then + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } +else + + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + cat >>confdefs.h <<\_ACEOF +#define clock_t long +_ACEOF + + +fi +rm -f conftest* + + + +# Checks for compiler characteristics +# ----------------------------------- + +# AC_CYGWIN +# AC_MINGW32 +{ echo "$as_me:$LINENO: checking for an ANSI C-conforming const" >&5 +echo $ECHO_N "checking for an ANSI C-conforming const... $ECHO_C" >&6; } +if test "${ac_cv_c_const+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ +/* FIXME: Include the comments suggested by Paul. */ +#ifndef __cplusplus + /* Ultrix mips cc rejects this. */ + typedef int charset[2]; + const charset cs; + /* SunOS 4.1.1 cc rejects this. */ + char const *const *pcpcc; + char **ppc; + /* NEC SVR4.0.2 mips cc rejects this. */ + struct point {int x, y;}; + static struct point const zero = {0,0}; + /* AIX XL C 1.02.0.0 rejects this. + It does not let you subtract one const X* pointer from another in + an arm of an if-expression whose if-part is not a constant + expression */ + const char *g = "string"; + pcpcc = &g + (g ? g-g : 0); + /* HPUX 7.0 cc rejects these. */ + ++pcpcc; + ppc = (char**) pcpcc; + pcpcc = (char const *const *) ppc; + { /* SCO 3.2v4 cc rejects this. */ + char *t; + char const *s = 0 ? (char *) 0 : (char const *) 0; + + *t++ = 0; + if (s) return 0; + } + { /* Someone thinks the Sun supposedly-ANSI compiler will reject this. */ + int x[] = {25, 17}; + const int *foo = &x[0]; + ++foo; + } + { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */ + typedef const int *iptr; + iptr p = 0; + ++p; + } + { /* AIX XL C 1.02.0.0 rejects this saying + "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */ + struct s { int j; const int *ap[3]; }; + struct s *b; b->j = 5; + } + { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ + const int foo = 10; + if (!foo) return 0; + } + return !cs[0] && !zero.x; +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_c_const=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_c_const=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ echo "$as_me:$LINENO: result: $ac_cv_c_const" >&5 +echo "${ECHO_T}$ac_cv_c_const" >&6; } +if test $ac_cv_c_const = no; then + +cat >>confdefs.h <<\_ACEOF +#define const +_ACEOF + +fi + + + + +{ echo "$as_me:$LINENO: checking if struct stat contains st_ino" >&5 +echo $ECHO_N "checking if struct stat contains st_ino... $ECHO_C" >&6; } +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ + + struct stat st; + stat(".", &st); + if (st.st_ino > 0) + exit(0); + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + have_st_ino=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + have_st_ino=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ echo "$as_me:$LINENO: result: $have_st_ino" >&5 +echo "${ECHO_T}$have_st_ino" >&6; } +if test yes = "$have_st_ino"; then + cat >>confdefs.h <<\_ACEOF +#define HAVE_STAT_ST_INO 1 +_ACEOF + +fi + + +# Checks for library functions +# ---------------------------- + + +for ac_func in fnmatch +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + +for ac_func in strstr +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + + +for ac_func in strcasecmp stricmp +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + break +fi +done + + + +for ac_func in strncasecmp strnicmp +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + break +fi +done + + +for ac_func in fgetpos +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + have_fgetpos=yes +fi +done + + +# SEEK_SET should be in stdio.h, but may be in unistd.h on SunOS 4.1.x +if test "$have_fgetpos" != yes ; then + + { echo "$as_me:$LINENO: checking if SEEK_SET is defined in stdio.h" >&5 +echo $ECHO_N "checking if SEEK_SET is defined in stdio.h... $ECHO_C" >&6; } + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#ifdef SEEK_SET + yes +#endif + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "yes" >/dev/null 2>&1; then + + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + + +else + + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + + { echo "$as_me:$LINENO: checking if SEEK_SET is defined in unistd.h" >&5 +echo $ECHO_N "checking if SEEK_SET is defined in unistd.h... $ECHO_C" >&6; } + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#ifdef SEEK_SET + yes +#endif + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "yes" >/dev/null 2>&1; then + + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + + +else + + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + cat >>confdefs.h <<\_ACEOF +#define SEEK_SET 0 +_ACEOF + + +fi +rm -f conftest* + + +fi +rm -f conftest* + +fi + + +for ac_func in mkstemp +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + have_mkstemp=yes +fi +done + +if test "$have_mkstemp" != yes ; then + +for ac_func in tempnam +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + have_tempnam=yes +fi +done + +fi +if test "$have_mkstemp" != yes -a "$have_tempnam" != yes; then + +for ac_func in chmod +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + if test "$tmpdir_specified" = yes ; then + { echo "$as_me:$LINENO: result: use of tmpnam overrides temporary directory selection" >&5 +echo "${ECHO_T}use of tmpnam overrides temporary directory selection" >&6; } + fi +fi + + + + +for ac_func in opendir findfirst _findfirst +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + break +fi +done + + +for ac_func in strerror +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + + +for ac_func in clock times +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + break +fi +done + + +for ac_func in remove +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + have_remove=yes +else + + { echo "$as_me:$LINENO: checking if remove is defined in unistd.h" >&5 +echo $ECHO_N "checking if remove is defined in unistd.h... $ECHO_C" >&6; } + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#ifdef remove + yes +#endif + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "yes" >/dev/null 2>&1; then + + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + + +else + + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + cat >>confdefs.h <<\_ACEOF +#define remove unlink +_ACEOF + + +fi +rm -f conftest* + +fi +done + + + +for ac_func in truncate +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + have_truncate=yes +fi +done + +# === Cannot nest AC_CHECK_FUNCS() calls +if test "$have_truncate" != yes ; then + +for ac_func in ftruncate +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + have_ftruncate=yes +fi +done + + if test "$have_ftruncate" != yes ; then + +for ac_func in chsize +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + fi +fi + + +for ac_func in setenv +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + have_setenv=yes +fi +done + +# === Cannot nest AC_CHECK_FUNCS() calls +if test "$have_setenv" != yes ; then + +for ac_func in putenv +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + have_putenv=yes +fi +done + + if test "$have_putenv" = yes ; then + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "putenv" >/dev/null 2>&1; then + have_putenv_prototype=yes +fi +rm -f conftest* + + if test "$have_putenv_prototype" = yes ; then + { echo "$as_me:$LINENO: checking putenv prototype" >&5 +echo $ECHO_N "checking putenv prototype... $ECHO_C" >&6; } + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "[^A-Za-zo-9_]putenv[ ]*\(.*const.*\)[ ]*;" >/dev/null 2>&1; then + { echo "$as_me:$LINENO: result: correct" >&5 +echo "${ECHO_T}correct" >&6; } +else + + { echo "$as_me:$LINENO: result: no const" >&5 +echo "${ECHO_T}no const" >&6; } + cat >>confdefs.h <<\_ACEOF +#define NON_CONST_PUTENV_PROTOTYPE 1 +_ACEOF + + +fi +rm -f conftest* + + fi + fi +fi + +# +# if test yes = "$CYGWIN"; then with_posix_regex=no; fi +if test no != "$with_posix_regex"; then + +for ac_func in regcomp +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +{ echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } +if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case declares $ac_func. + For example, HP-UX 11i declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $ac_func + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$ac_func || defined __stub___$ac_func +choke me +#endif + +int +main () +{ +return $ac_func (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && + $as_test_x conftest$ac_exeext; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_var=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +ac_res=`eval echo '${'$as_ac_var'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + { echo "$as_me:$LINENO: checking if regcomp works" >&5 +echo $ECHO_N "checking if regcomp works... $ECHO_C" >&6; } + if test "$cross_compiling" = yes; then + cat >>confdefs.h <<\_ACEOF +#define CHECK_REGCOMP 1 +_ACEOF + +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +#include +#include +main() { + regex_t patbuf; + exit (regcomp (&patbuf, "/hello/", 0) != 0); +} +_ACEOF +rm -f conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + regcomp_works=yes +else + echo "$as_me: program exited with status $ac_status" >&5 +echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +regcomp_works=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi + + + { echo "$as_me:$LINENO: result: $regcomp_works" >&5 +echo "${ECHO_T}$regcomp_works" >&6; } + if test yes != "$regcomp_works"; then + cat >>confdefs.h <<\_ACEOF +#define REGCOMP_BROKEN 1 +_ACEOF + + fi +fi + +# if test yes = "$with_perl_regex"; then +# AC_MSG_CHECKING(for Perl regex library) +# pcre_candidates="$with_perl_regex $HOME/local/lib* /usr*/local/lib* /usr/lib*" +# for lib in $pcre_candidates; do +# if test -f $lib/libpcreposix.so; then +# pcre_lib="-L$lib -lpcreposix" +# break +# elif test -f $lib/libpcreposix.a; then +# pcre_lib="$lib/libpcreposix.a" +# break +# fi +# done +# if test -z "$pcre_lib"; then +# AC_MSG_RESULT(not found) +# else +# AC_MSG_RESULT($lib) +# AC_DEFINE(HAVE_REGCOMP) +# LDFLAGS="$LDFLAGS $pcre_lib" +# have_regex=yes +# fi +# fi + + +# Checks for missing prototypes +# ----------------------------- +{ echo "$as_me:$LINENO: checking for new missing prototypes..." >&5 +echo "$as_me: checking for new missing prototypes..." >&6;} + + + +if test "$have_remove" = yes ; then + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "[^A-Za-z0-9_]remove([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then + : +else + + { echo "$as_me:$LINENO: result: adding prototype for remove" >&5 +echo "${ECHO_T}adding prototype for remove" >&6; } + cat >>confdefs.h <<\_ACEOF +#define NEED_PROTO_REMOVE 1 +_ACEOF + + +fi +rm -f conftest* + +else + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "[^A-Za-z0-9_]unlink([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then + : +else + + { echo "$as_me:$LINENO: result: adding prototype for unlink" >&5 +echo "${ECHO_T}adding prototype for unlink" >&6; } + cat >>confdefs.h <<\_ACEOF +#define NEED_PROTO_UNLINK 1 +_ACEOF + + +fi +rm -f conftest* + +fi + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "[^A-Za-z0-9_]malloc([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then + : +else + + { echo "$as_me:$LINENO: result: adding prototype for malloc" >&5 +echo "${ECHO_T}adding prototype for malloc" >&6; } + cat >>confdefs.h <<\_ACEOF +#define NEED_PROTO_MALLOC 1 +_ACEOF + + +fi +rm -f conftest* + + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "[^A-Za-z0-9_]getenv([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then + : +else + + { echo "$as_me:$LINENO: result: adding prototype for getenv" >&5 +echo "${ECHO_T}adding prototype for getenv" >&6; } + cat >>confdefs.h <<\_ACEOF +#define NEED_PROTO_GETENV 1 +_ACEOF + + +fi +rm -f conftest* + + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "[^A-Za-z0-9_]stat([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then + : +else + + { echo "$as_me:$LINENO: result: adding prototype for stat" >&5 +echo "${ECHO_T}adding prototype for stat" >&6; } + cat >>confdefs.h <<\_ACEOF +#define NEED_PROTO_STAT 1 +_ACEOF + + +fi +rm -f conftest* + + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "[^A-Za-z0-9_]lstat([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then + : +else + + { echo "$as_me:$LINENO: result: adding prototype for lstat" >&5 +echo "${ECHO_T}adding prototype for lstat" >&6; } + cat >>confdefs.h <<\_ACEOF +#define NEED_PROTO_LSTAT 1 +_ACEOF + + +fi +rm -f conftest* + +if test "$have_fgetpos" = yes ; then + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "[^A-Za-z0-9_]fgetpos([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then + : +else + + { echo "$as_me:$LINENO: result: adding prototype for fgetpos" >&5 +echo "${ECHO_T}adding prototype for fgetpos" >&6; } + cat >>confdefs.h <<\_ACEOF +#define NEED_PROTO_FGETPOS 1 +_ACEOF + + +fi +rm -f conftest* + +fi +if test "$have_truncate" = yes ; then + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "[^A-Za-z0-9_]truncate([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then + : +else + + { echo "$as_me:$LINENO: result: adding prototype for truncate" >&5 +echo "${ECHO_T}adding prototype for truncate" >&6; } + cat >>confdefs.h <<\_ACEOF +#define NEED_PROTO_TRUNCATE 1 +_ACEOF + + +fi +rm -f conftest* + +fi +if test "$have_ftruncate" = yes ; then + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "[^A-Za-z0-9_]ftruncate([ ]+[A-Za-z0-9_]*)?[ ]*\(" >/dev/null 2>&1; then + : +else + + { echo "$as_me:$LINENO: result: adding prototype for ftruncate" >&5 +echo "${ECHO_T}adding prototype for ftruncate" >&6; } + cat >>confdefs.h <<\_ACEOF +#define NEED_PROTO_FTRUNCATE 1 +_ACEOF + + +fi +rm -f conftest* + +fi + + +# Output files +# ------------ + + +rm -f Makefile +if test "$enable_maintainer_mode" = yes ; then + { echo "$as_me:$LINENO: result: creating maintainer Makefile" >&5 +echo "${ECHO_T}creating maintainer Makefile" >&6; } + ln -s maintainer.mak Makefile + makefile_out= +else + makefile_out=Makefile +fi +ac_config_files="$ac_config_files $makefile_out" + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5 +echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + *) $as_unset $ac_var ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes (double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \). + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + test "x$cache_file" != "x/dev/null" && + { echo "$as_me:$LINENO: updating cache $cache_file" >&5 +echo "$as_me: updating cache $cache_file" >&6;} + cat confcache >$cache_file + else + { echo "$as_me:$LINENO: not updating unwritable cache $cache_file" >&5 +echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + ac_libobjs="$ac_libobjs \${LIBOBJDIR}$ac_i\$U.$ac_objext" + ac_ltlibobjs="$ac_ltlibobjs \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + + +: ${CONFIG_STATUS=./config.status} +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5 +echo "$as_me: creating $CONFIG_STATUS" >&6;} +cat >$CONFIG_STATUS <<_ACEOF +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false +SHELL=\${CONFIG_SHELL-$SHELL} +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; +esac + +fi + + + + +# PATH needs CR +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +as_nl=' +' +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + { (exit 1); exit 1; } +fi + +# Work around bugs in pre-3.0 UWIN ksh. +for as_var in ENV MAIL MAILPATH +do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +for as_var in \ + LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ + LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ + LC_TELEPHONE LC_TIME +do + if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then + eval $as_var=C; export $as_var + else + ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var + fi +done + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# CDPATH. +$as_unset CDPATH + + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line after each line using $LINENO; the second 'sed' + # does the real work. The second script uses 'N' to pair each + # line-number line with the line containing $LINENO, and appends + # trailing '-' during substitution so that $LINENO is not a special + # case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # scripts with optimization help from Paolo Bonzini. Blame Lee + # E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in +-n*) + case `echo 'x\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + *) ECHO_C='\c';; + esac;; +*) + ECHO_N='-n';; +esac + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir +fi +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -p'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -p' +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +if test -x / >/dev/null 2>&1; then + as_test_x='test -x' +else + if ls -dL / >/dev/null 2>&1; then + as_ls_L_option=L + else + as_ls_L_option= + fi + as_test_x=' + eval sh -c '\'' + if test -d "$1"; then + test -d "$1/."; + else + case $1 in + -*)set "./$1";; + esac; + case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in + ???[sx]*):;;*)false;;esac;fi + '\'' sh + ' +fi +as_executable_p=$as_test_x + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 + +# Save the log message, to keep $[0] and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by $as_me, which was +generated by GNU Autoconf 2.61. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +cat >>$CONFIG_STATUS <<_ACEOF +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +ac_cs_usage="\ +\`$as_me' instantiates files from templates according to the +current configuration. + +Usage: $0 [OPTIONS] [FILE]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + -q, --quiet do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Report bugs to ." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF +ac_cs_version="\\ +config.status +configured by $0, generated by GNU Autoconf 2.61, + with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" + +Copyright (C) 2006 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +# If no file are specified by the user, then we need to provide default +# value. By we need to know if files were specified by the user. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + echo "$ac_cs_version"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + CONFIG_FILES="$CONFIG_FILES $ac_optarg" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + { echo "$as_me: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&2 + { (exit 1); exit 1; }; };; + --help | --hel | -h ) + echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) { echo "$as_me: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&2 + { (exit 1); exit 1; }; } ;; + + *) ac_config_targets="$ac_config_targets $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF +if \$ac_cs_recheck; then + echo "running CONFIG_SHELL=$SHELL $SHELL $0 "$ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6 + CONFIG_SHELL=$SHELL + export CONFIG_SHELL + exec $SHELL "$0"$ac_configure_args \$ac_configure_extra_args --no-create --no-recursion +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; + "$makefile_out") CONFIG_FILES="$CONFIG_FILES $makefile_out" ;; + + *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 +echo "$as_me: error: invalid argument: $ac_config_target" >&2;} + { (exit 1); exit 1; }; };; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= + trap 'exit_status=$? + { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status +' 0 + trap '{ (exit 1); exit 1; }' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -n "$tmp" && test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || +{ + echo "$me: cannot create a temporary directory in ." >&2 + { (exit 1); exit 1; } +} + +# +# Set up the sed scripts for CONFIG_FILES section. +# + +# No need to generate the scripts if there are no CONFIG_FILES. +# This happens for instance when ./config.status config.h +if test -n "$CONFIG_FILES"; then + +_ACEOF + + + +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + cat >conf$$subs.sed <<_ACEOF +SHELL!$SHELL$ac_delim +PATH_SEPARATOR!$PATH_SEPARATOR$ac_delim +PACKAGE_NAME!$PACKAGE_NAME$ac_delim +PACKAGE_TARNAME!$PACKAGE_TARNAME$ac_delim +PACKAGE_VERSION!$PACKAGE_VERSION$ac_delim +PACKAGE_STRING!$PACKAGE_STRING$ac_delim +PACKAGE_BUGREPORT!$PACKAGE_BUGREPORT$ac_delim +exec_prefix!$exec_prefix$ac_delim +prefix!$prefix$ac_delim +program_transform_name!$program_transform_name$ac_delim +bindir!$bindir$ac_delim +sbindir!$sbindir$ac_delim +libexecdir!$libexecdir$ac_delim +datarootdir!$datarootdir$ac_delim +datadir!$datadir$ac_delim +sysconfdir!$sysconfdir$ac_delim +sharedstatedir!$sharedstatedir$ac_delim +localstatedir!$localstatedir$ac_delim +includedir!$includedir$ac_delim +oldincludedir!$oldincludedir$ac_delim +docdir!$docdir$ac_delim +infodir!$infodir$ac_delim +htmldir!$htmldir$ac_delim +dvidir!$dvidir$ac_delim +pdfdir!$pdfdir$ac_delim +psdir!$psdir$ac_delim +libdir!$libdir$ac_delim +localedir!$localedir$ac_delim +mandir!$mandir$ac_delim +DEFS!$DEFS$ac_delim +ECHO_C!$ECHO_C$ac_delim +ECHO_N!$ECHO_N$ac_delim +ECHO_T!$ECHO_T$ac_delim +LIBS!$LIBS$ac_delim +build_alias!$build_alias$ac_delim +host_alias!$host_alias$ac_delim +target_alias!$target_alias$ac_delim +install_targets!$install_targets$ac_delim +CC!$CC$ac_delim +CFLAGS!$CFLAGS$ac_delim +LDFLAGS!$LDFLAGS$ac_delim +CPPFLAGS!$CPPFLAGS$ac_delim +ac_ct_CC!$ac_ct_CC$ac_delim +EXEEXT!$EXEEXT$ac_delim +OBJEXT!$OBJEXT$ac_delim +LN_S!$LN_S$ac_delim +STRIP!$STRIP$ac_delim +sort_found!$sort_found$ac_delim +CPP!$CPP$ac_delim +GREP!$GREP$ac_delim +EGREP!$EGREP$ac_delim +LIBOBJS!$LIBOBJS$ac_delim +LTLIBOBJS!$LTLIBOBJS$ac_delim +_ACEOF + + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 53; then + break + elif $ac_last_try; then + { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 +echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} + { (exit 1); exit 1; }; } + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed` +if test -n "$ac_eof"; then + ac_eof=`echo "$ac_eof" | sort -nru | sed 1q` + ac_eof=`expr $ac_eof + 1` +fi + +cat >>$CONFIG_STATUS <<_ACEOF +cat >"\$tmp/subs-1.sed" <<\CEOF$ac_eof +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b end +_ACEOF +sed ' +s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g +s/^/s,@/; s/!/@,|#_!!_#|/ +:n +t n +s/'"$ac_delim"'$/,g/; t +s/$/\\/; p +N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n +' >>$CONFIG_STATUS >$CONFIG_STATUS <<_ACEOF +:end +s/|#_!!_#|//g +CEOF$ac_eof +_ACEOF + + +# VPATH may cause trouble with some makes, so we remove $(srcdir), +# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=/{ +s/:*\$(srcdir):*/:/ +s/:*\${srcdir}:*/:/ +s/:*@srcdir@:*/:/ +s/^\([^=]*=[ ]*\):*/\1/ +s/:*$// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF +fi # test -n "$CONFIG_FILES" + + +for ac_tag in :F $CONFIG_FILES :H $CONFIG_HEADERS +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) { { echo "$as_me:$LINENO: error: Invalid tag $ac_tag." >&5 +echo "$as_me: error: Invalid tag $ac_tag." >&2;} + { (exit 1); exit 1; }; };; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + { { echo "$as_me:$LINENO: error: cannot find input file: $ac_f" >&5 +echo "$as_me: error: cannot find input file: $ac_f" >&2;} + { (exit 1); exit 1; }; };; + esac + ac_file_inputs="$ac_file_inputs $ac_f" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input="Generated from "`IFS=: + echo $* | sed 's|^[^:]*/||;s|:[^:]*/|, |g'`" by configure." + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { echo "$as_me:$LINENO: creating $ac_file" >&5 +echo "$as_me: creating $ac_file" >&6;} + fi + + case $ac_tag in + *:-:* | *:-) cat >"$tmp/stdin";; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + { as_dir="$ac_dir" + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || { { echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5 +echo "$as_me: error: cannot create directory $as_dir" >&2;} + { (exit 1); exit 1; }; }; } + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= + +case `sed -n '/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p +' $ac_file_inputs` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { echo "$as_me:$LINENO: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF + sed "$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s&@configure_input@&$configure_input&;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +$ac_datarootdir_hack +" $ac_file_inputs | sed -f "$tmp/subs-1.sed" >$tmp/out + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } && + { echo "$as_me:$LINENO: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined." >&5 +echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined." >&2;} + + rm -f "$tmp/stdin" + case $ac_file in + -) cat "$tmp/out"; rm -f "$tmp/out";; + *) rm -f "$ac_file"; mv "$tmp/out" $ac_file;; + esac + ;; + :H) + # + # CONFIG_HEADER + # +_ACEOF + +# Transform confdefs.h into a sed script `conftest.defines', that +# substitutes the proper values into config.h.in to produce config.h. +rm -f conftest.defines conftest.tail +# First, append a space to every undef/define line, to ease matching. +echo 's/$/ /' >conftest.defines +# Then, protect against being on the right side of a sed subst, or in +# an unquoted here document, in config.status. If some macros were +# called several times there might be several #defines for the same +# symbol, which is useless. But do not sort them, since the last +# AC_DEFINE must be honored. +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +# These sed commands are passed to sed as "A NAME B PARAMS C VALUE D", where +# NAME is the cpp macro being defined, VALUE is the value it is being given. +# PARAMS is the parameter list in the macro definition--in most cases, it's +# just an empty string. +ac_dA='s,^\\([ #]*\\)[^ ]*\\([ ]*' +ac_dB='\\)[ (].*,\\1define\\2' +ac_dC=' ' +ac_dD=' ,' + +uniq confdefs.h | + sed -n ' + t rset + :rset + s/^[ ]*#[ ]*define[ ][ ]*// + t ok + d + :ok + s/[\\&,]/\\&/g + s/^\('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/ '"$ac_dA"'\1'"$ac_dB"'\2'"${ac_dC}"'\3'"$ac_dD"'/p + s/^\('"$ac_word_re"'\)[ ]*\(.*\)/'"$ac_dA"'\1'"$ac_dB$ac_dC"'\2'"$ac_dD"'/p + ' >>conftest.defines + +# Remove the space that was appended to ease matching. +# Then replace #undef with comments. This is necessary, for +# example, in the case of _POSIX_SOURCE, which is predefined and required +# on some systems where configure will not decide to define it. +# (The regexp can be short, since the line contains either #define or #undef.) +echo 's/ $// +s,^[ #]*u.*,/* & */,' >>conftest.defines + +# Break up conftest.defines: +ac_max_sed_lines=50 + +# First sed command is: sed -f defines.sed $ac_file_inputs >"$tmp/out1" +# Second one is: sed -f defines.sed "$tmp/out1" >"$tmp/out2" +# Third one will be: sed -f defines.sed "$tmp/out2" >"$tmp/out1" +# et cetera. +ac_in='$ac_file_inputs' +ac_out='"$tmp/out1"' +ac_nxt='"$tmp/out2"' + +while : +do + # Write a here document: + cat >>$CONFIG_STATUS <<_ACEOF + # First, check the format of the line: + cat >"\$tmp/defines.sed" <<\\CEOF +/^[ ]*#[ ]*undef[ ][ ]*$ac_word_re[ ]*\$/b def +/^[ ]*#[ ]*define[ ][ ]*$ac_word_re[( ]/b def +b +:def +_ACEOF + sed ${ac_max_sed_lines}q conftest.defines >>$CONFIG_STATUS + echo 'CEOF + sed -f "$tmp/defines.sed"' "$ac_in >$ac_out" >>$CONFIG_STATUS + ac_in=$ac_out; ac_out=$ac_nxt; ac_nxt=$ac_in + sed 1,${ac_max_sed_lines}d conftest.defines >conftest.tail + grep . conftest.tail >/dev/null || break + rm -f conftest.defines + mv conftest.tail conftest.defines +done +rm -f conftest.defines conftest.tail + +echo "ac_result=$ac_in" >>$CONFIG_STATUS +cat >>$CONFIG_STATUS <<\_ACEOF + if test x"$ac_file" != x-; then + echo "/* $configure_input */" >"$tmp/config.h" + cat "$ac_result" >>"$tmp/config.h" + if diff $ac_file "$tmp/config.h" >/dev/null 2>&1; then + { echo "$as_me:$LINENO: $ac_file is unchanged" >&5 +echo "$as_me: $ac_file is unchanged" >&6;} + else + rm -f $ac_file + mv "$tmp/config.h" $ac_file + fi + else + echo "/* $configure_input */" + cat "$ac_result" + fi + rm -f "$tmp/out12" + ;; + + + esac + +done # for ac_tag + + +{ (exit 0); exit 0; } +_ACEOF +chmod +x $CONFIG_STATUS +ac_clean_files=$ac_clean_files_save + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || { (exit 1); exit 1; } +fi + + +# vim:ts=4:sw=4: diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..7fc81d4 --- /dev/null +++ b/configure.ac @@ -0,0 +1,532 @@ +# $Id$ + +# Copyright (c) 2009, Darren Hiebert +# +# This source code is released for free distribution under the terms +# of the GNU General Public License. + +# Process this file with autoconf to produce a configure script. + +AC_PREREQ([2.61]) +AC_INIT([ctags.h]) +AC_CONFIG_HEADERS([config.h]) + +AH_TEMPLATE([PACKAGE], [Package name.]) +AH_TEMPLATE([VERSION], [Package version.]) +AH_TEMPLATE([clock_t], + [Define to the appropriate type if does not define this.]) +AH_TEMPLATE([fpos_t], + [Define to long if does not define this.]) +AH_TEMPLATE([L_tmpnam], + [Define to the appropriate size for tmpnam() if does not define + this.]) +AH_TEMPLATE([HAVE_STAT_ST_INO], + [Define this macro if the field "st_ino" exists in struct stat in + .]) +AH_TEMPLATE([remove], + [Define remove to unlink if you have unlink(), but not remove().]) +AH_TEMPLATE([SEEK_SET], + [Define this value used by fseek() appropriately if + (or on SunOS 4.1.x) does not define them.]) +AH_TEMPLATE([INT_MAX], + [Define as the maximum integer on your system if not defined .]) +AH_TEMPLATE([CUSTOM_CONFIGURATION_FILE], + [You can define this label to be a string containing the name of a + site-specific configuration file containing site-wide default options. The + files /etc/ctags.conf and /usr/local/etc/ctags.conf are already checked, + so only define one here if you need a file somewhere else.]) +AH_TEMPLATE([MACROS_USE_PATTERNS], + [Define this label if you want macro tags (defined lables) to use patterns + in the EX command by default (original ctags behavior is to use line + numbers).]) +AH_VERBATIM([DEFAULT_FILE_FORMAT], [ +/* Define this as desired. + * 1: Original ctags format + * 2: Extended ctags format with extension flags in EX-style comment. + */ +#define DEFAULT_FILE_FORMAT 2 +]) +AH_TEMPLATE([SYS_INTERPRETER], + [Define this label if your system supports starting scripts with a line of + the form "#! /bin/sh" to select the interpreter to use for the script.]) +AH_TEMPLATE([CASE_INSENSITIVE_FILENAMES], + [Define this label if your system uses case-insensitive file names]) +AH_VERBATIM([EXTERNAL_SORT], [ +/* Define this label to use the system sort utility (which is probably more +* efficient) over the internal sorting algorithm. +*/ +#ifndef INTERNAL_SORT +# undef EXTERNAL_SORT +#endif +]) +AH_TEMPLATE([TMPDIR], + [If you wish to change the directory in which temporary files are stored, + define this label to the directory desired.]) +AH_TEMPLATE([REGCOMP_BROKEN], + [Define this label if regcomp() is broken.]) +AH_TEMPLATE([CHECK_REGCOMP], + [Define this label if you wish to check the regcomp() function at run time + for correct behavior. This function is currently broken on Cygwin.]) +AH_TEMPLATE([__USE_FIXED_PROTOTYPES__], + [This corrects the problem of missing prototypes for certain functions + in some GNU installations (e.g. SunOS 4.1.x).]) +AH_TEMPLATE([NON_CONST_PUTENV_PROTOTYPE], + [Define this is you have a prototype for putenv() in , but + doesn't declare its argument as "const char *".]) +AH_TEMPLATE([NEED_PROTO_REMOVE], + [If you receive error or warning messages indicating that you are missing + a prototype for, or a type mismatch using, the following function, define + this label and remake.]) +AH_TEMPLATE([NEED_PROTO_UNLINK], + [If you receive error or warning messages indicating that you are missing + a prototype for, or a type mismatch using, the following function, define + this label and remake.]) +AH_TEMPLATE([NEED_PROTO_MALLOC], + [If you receive error or warning messages indicating that you are missing + a prototype for, or a type mismatch using, the following function, define + this label and remake.]) +AH_TEMPLATE([NEED_PROTO_GETENV], + [If you receive error or warning messages indicating that you are missing + a prototype for, or a type mismatch using, the following function, define + this label and remake.]) +AH_TEMPLATE([NEED_PROTO_FGETPOS], + [If you receive error or warning messages indicating that you are missing + a prototype for, or a type mismatch using, the following function, define + this label and remake.]) +AH_TEMPLATE([NEED_PROTO_STAT], + [If you receive error or warning messages indicating that you are missing + a prototype for, or a type mismatch using, the following function, define + this label and remake.]) +AH_TEMPLATE([NEED_PROTO_LSTAT], + [If you receive error or warning messages indicating that you are missing + a prototype for, or a type mismatch using, the following function, define + this label and remake.]) +AH_TEMPLATE([NEED_PROTO_TRUNCATE], + [If you receive error or warning messages indicating that you are missing + a prototype for, or a type mismatch using, the following function, define + this label and remake.]) +AH_TEMPLATE([NEED_PROTO_FTRUNCATE], + [If you receive error or warning messages indicating that you are missing + a prototype for, or a type mismatch using, the following function, define + this label and remake.]) + + + +# Report system info +# ------------------ +program_name=[`grep 'PROGRAM_NAME *"' ctags.h | sed -e 's/.*"\([^"]*\)".*/\1/'`] +program_version=[`grep 'PROGRAM_VERSION *"' ctags.h | sed -e 's/.*"\([^"]*\)".*/\1/'`] +echo "$program_name, version $program_version" +uname -mrsv 2>/dev/null + +# Define convenience macros +# ------------------------- +# CHECK_HEADER_DEFINE(LABEL, HEADER [,ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND ] ]) +AC_DEFUN([CHECK_HEADER_DEFINE], [ + AC_MSG_CHECKING([if $1 is defined in $2]) + AC_EGREP_CPP(yes, +[#include <$2> +#ifdef $1 + yes +#endif +], [ + AC_MSG_RESULT(yes) + [$3] +], [ + AC_MSG_RESULT(no) + [$4] +]) ]) + +# Checks for configuration options +# -------------------------------- + +AC_ARG_WITH(posix-regex, +[ --with-posix-regex use Posix regex interface, if available]) + +AC_ARG_WITH(readlib, +[ --with-readlib include readtags library object during install]) + +# AC_ARG_WITH(perl-regex, +# [ --with-perl-regex use Perl pcre interface, if available]) + +AC_ARG_ENABLE(etags, +[ --enable-etags enable the installation of links for etags]) + +AC_ARG_ENABLE(extended-format, +[ --disable-extended-format + disable extension flags; use original ctags file + format only], + AC_DEFINE(DEFAULT_FILE_FORMAT, 1), AC_DEFINE(DEFAULT_FILE_FORMAT, 2)) + +AC_ARG_ENABLE(external-sort, +[ --disable-external-sort use internal sort algorithm instead of sort program]) + +AC_ARG_ENABLE(custom-config, +[ --enable-custom-config=FILE + enable custom config file for site-wide defaults]) + +AC_ARG_ENABLE(macro-patterns, +[ --enable-macro-patterns use patterns as default method to locate macros + instead of line numbers]) + +AC_ARG_ENABLE(maintainer-mode, +[ --enable-maintainer-mode + use maintainer makefile]) + +AC_ARG_ENABLE(shell-globbing, +[ --enable-shell-globbing=DIR + does shell expand wildcards (yes|no)? [yes]]) + +AC_ARG_ENABLE(tmpdir, +[ --enable-tmpdir=DIR default directory for temporary files [ARG=/tmp]], + tmpdir_specified=yes) + + +# Process configuration options +# ----------------------------- + +if test "$enable_maintainer_mode" = yes ; then + AC_MSG_RESULT(enabling maintainer mode) +fi + +install_targets="install-ctags" +AC_MSG_CHECKING(whether to install link to etags) +if test yes = "$enable_etags"; then + AC_MSG_RESULT(yes) + install_targets="$install_targets install-etags" +else + AC_MSG_RESULT(no) +fi +AC_MSG_CHECKING(whether to install readtags object file) +if test yes = "$with_readlib"; then + AC_MSG_RESULT(yes) + install_targets="$install_targets install-lib" +else + AC_MSG_RESULT(no) +fi +AC_SUBST(install_targets) + +if test "$enable_custom_config" = no -o "$enable_custom_config" = yes ; then + AC_MSG_RESULT(no name supplied for custom configuration file) +elif test -n "$enable_custom_config" ; then + AC_DEFINE_UNQUOTED(CUSTOM_CONFIGURATION_FILE, "$enable_custom_config") + AC_MSG_RESULT($enable_custom_config will be used as custom configuration file) +fi + +if test "$enable_macro_patterns" = yes ; then + AC_DEFINE(MACROS_USE_PATTERNS) + AC_MSG_RESULT(tag file will use patterns for macros by default) +fi + +# Checks for programs +# ------------------- + +AC_PROG_CC + +case `uname` in + HP-UX) + AC_MSG_CHECKING(HP-UX native compiler) + if test "$CC" = "cc"; then + AC_MSG_RESULT(yes; adding compiler options for ANSI support) + CFLAGS="$CFLAGS -Aa -D_HPUX_SOURCE" + else + AC_MSG_RESULT(no) + fi + ;; + SunOS) + if uname -r | grep '5\.' >/dev/null 2>&1; then + AC_MSG_CHECKING(Solaris native compiler) + if test "$CC" = "cc" -a "`which cc`" = "/usr/ucb/cc"; then + AC_MSG_RESULT(yes; adding compiler option for ANSI support) + CC="$CC -Xa" + else + AC_MSG_RESULT(no) + fi + fi + ;; +esac + +AC_PROG_LN_S +AC_CHECK_PROG(STRIP, strip, strip, :) + + +# Checks for operating environment +# -------------------------------- + +# Check for temporary directory +AC_MSG_CHECKING(directory to use for temporary files) +if test -n "$enable_tmpdir"; then + tmpdir="$enable_tmpdir" +elif test -n "$TMPDIR"; then + tmpdir="$TMPDIR" +elif test -n "$TMPDIR"; then + tmpdir="$TMPDIR" +elif test -n "$TMP"; then + tmpdir="$TMP" +elif test -n "$TEMP"; then + tmpdir="$TEMP" +elif test -d "c:/"; then + tmpdir="c:/" +else + tmpdir="/tmp" +fi +if test -d $tmpdir ; then + AC_MSG_RESULT($tmpdir) + AC_DEFINE_UNQUOTED(TMPDIR, "$tmpdir") +else + AC_MSG_ERROR($tmpdir does not exist) +fi + +# Check whether system supports #! scripts +AC_SYS_INTERPRETER +if test yes = "$interpval"; then + AC_DEFINE(SYS_INTERPRETER) +fi + +# Test for case-insensitive filenames +AC_MSG_CHECKING(for case-insensitive filenames) +touch conftest.cif +if test -f CONFTEST.CIF; then + AC_MSG_RESULT(yes) + AC_DEFINE(CASE_INSENSITIVE_FILENAMES) +else + AC_MSG_RESULT(no) +fi +rm -f conftest.cif + +AC_MSG_CHECKING(selected sort method) +if test no = "$enable_external_sort"; then + AC_MSG_RESULT(simple internal algorithm) +else + AC_MSG_RESULT(external sort utility) + enable_external_sort=no + AC_CHECK_PROG(sort_found, sort, yes, no) + if test "$sort_found" = yes ; then + AC_MSG_CHECKING(if sort accepts our command line) + touch ${tmpdir}/sort.test + sort -u -f -o ${tmpdir}/sort.test ${tmpdir}/sort.test 1>/dev/null 2>&1 + if test $? -ne 0 ; then + AC_MSG_RESULT(no) + else + AC_MSG_RESULT(yes) + AC_DEFINE(EXTERNAL_SORT) + enable_external_sort=yes + fi + rm -f ${tmpdir}/sort.test + fi +fi +if test "$enable_external_sort" != yes ; then + AC_MSG_RESULT(using internal sort algorithm as fallback) +fi + + +# Checks for header files +# ----------------------- + +AC_CHECK_HEADERS_ONCE([dirent.h fcntl.h fnmatch.h stat.h stdlib.h string.h]) +AC_CHECK_HEADERS_ONCE([time.h types.h unistd.h]) +AC_CHECK_HEADERS_ONCE([sys/dir.h sys/stat.h sys/times.h sys/types.h]) + + +# Checks for header file macros +# ----------------------------- + +CHECK_HEADER_DEFINE(L_tmpnam, [stdio.h],, AC_DEFINE(L_tmpnam, 20)) + +CHECK_HEADER_DEFINE(INT_MAX, [limits.h],, + CHECK_HEADER_DEFINE(MAXINT, [limits.h], + AC_DEFINE(INT_MAX, MAXINT), AC_DEFINE(INT_MAX, 32767))) + + +# Checks for typedefs +# ------------------- + +AC_TYPE_SIZE_T +AC_TYPE_OFF_T + +AC_MSG_CHECKING(for fpos_t) +AC_EGREP_HEADER(fpos_t, stdio.h, AC_MSG_RESULT(yes), +[ + AC_MSG_RESULT(no) + AC_DEFINE(fpos_t, long) +]) + +AC_MSG_CHECKING(for clock_t) +AC_EGREP_HEADER(clock_t, time.h, AC_MSG_RESULT(yes), +[ + AC_MSG_RESULT(no) + AC_DEFINE(clock_t, long) +]) + + +# Checks for compiler characteristics +# ----------------------------------- + +# AC_CYGWIN +# AC_MINGW32 +AC_C_CONST +AC_OBJEXT +AC_EXEEXT + +AC_MSG_CHECKING(if struct stat contains st_ino) +AC_TRY_COMPILE([#include ], [ + struct stat st; + stat(".", &st); + if (st.st_ino > 0) + exit(0); +], have_st_ino=yes, have_st_ino=no) +AC_MSG_RESULT($have_st_ino) +if test yes = "$have_st_ino"; then + AC_DEFINE(HAVE_STAT_ST_INO) +fi + + +# Checks for library functions +# ---------------------------- + +AC_CHECK_FUNCS(fnmatch) +AC_CHECK_FUNCS(strstr) +AC_CHECK_FUNCS(strcasecmp stricmp, break) +AC_CHECK_FUNCS(strncasecmp strnicmp, break) +AC_CHECK_FUNCS(fgetpos, have_fgetpos=yes) + +# SEEK_SET should be in stdio.h, but may be in unistd.h on SunOS 4.1.x +if test "$have_fgetpos" != yes ; then + CHECK_HEADER_DEFINE(SEEK_SET, stdio.h,, + CHECK_HEADER_DEFINE(SEEK_SET, unistd.h,, + AC_DEFINE(SEEK_SET, 0))) +fi + +AC_CHECK_FUNCS(mkstemp, have_mkstemp=yes) +if test "$have_mkstemp" != yes ; then + AC_CHECK_FUNCS(tempnam, have_tempnam=yes) +fi +if test "$have_mkstemp" != yes -a "$have_tempnam" != yes; then + AC_CHECK_FUNCS(chmod) + if test "$tmpdir_specified" = yes ; then + AC_MSG_RESULT(use of tmpnam overrides temporary directory selection) + fi +fi + +AC_CHECK_FUNCS(opendir findfirst _findfirst, break) +AC_CHECK_FUNCS(strerror) +AC_CHECK_FUNCS(clock times, break) +AC_CHECK_FUNCS(remove, have_remove=yes, + CHECK_HEADER_DEFINE(remove, unistd.h,, AC_DEFINE(remove, unlink))) + +AC_CHECK_FUNCS(truncate, have_truncate=yes) +# === Cannot nest AC_CHECK_FUNCS() calls +if test "$have_truncate" != yes ; then + AC_CHECK_FUNCS(ftruncate, have_ftruncate=yes) + if test "$have_ftruncate" != yes ; then + AC_CHECK_FUNCS(chsize) + fi +fi + +AC_CHECK_FUNCS(setenv, have_setenv=yes) +# === Cannot nest AC_CHECK_FUNCS() calls +if test "$have_setenv" != yes ; then + AC_CHECK_FUNCS(putenv, have_putenv=yes) + if test "$have_putenv" = yes ; then + AC_EGREP_HEADER(putenv, stdlib.h, have_putenv_prototype=yes) + if test "$have_putenv_prototype" = yes ; then + AC_MSG_CHECKING(putenv prototype) + AC_EGREP_HEADER([[^A-Za-zo-9_]putenv[ ]*\(.*const.*\)[ ]*;], + stdlib.h, AC_MSG_RESULT(correct), + [ + AC_MSG_RESULT(no const) + AC_DEFINE(NON_CONST_PUTENV_PROTOTYPE) + ]) + fi + fi +fi + +# +# if test yes = "$CYGWIN"; then with_posix_regex=no; fi +if test no != "$with_posix_regex"; then + AC_CHECK_FUNCS(regcomp) + AC_MSG_CHECKING(if regcomp works) + AC_TRY_RUN([ +#include +#include +main() { + regex_t patbuf; + exit (regcomp (&patbuf, "/hello/", 0) != 0); +}],regcomp_works=yes,regcomp_works=no,AC_DEFINE(CHECK_REGCOMP)) + AC_MSG_RESULT($regcomp_works) + if test yes != "$regcomp_works"; then + AC_DEFINE(REGCOMP_BROKEN) + fi +fi + +# if test yes = "$with_perl_regex"; then +# AC_MSG_CHECKING(for Perl regex library) +# pcre_candidates="$with_perl_regex $HOME/local/lib* /usr*/local/lib* /usr/lib*" +# for lib in $pcre_candidates; do +# if test -f $lib/libpcreposix.so; then +# pcre_lib="-L$lib -lpcreposix" +# break +# elif test -f $lib/libpcreposix.a; then +# pcre_lib="$lib/libpcreposix.a" +# break +# fi +# done +# if test -z "$pcre_lib"; then +# AC_MSG_RESULT(not found) +# else +# AC_MSG_RESULT($lib) +# AC_DEFINE(HAVE_REGCOMP) +# LDFLAGS="$LDFLAGS $pcre_lib" +# have_regex=yes +# fi +# fi + + +# Checks for missing prototypes +# ----------------------------- +AC_CHECKING(for new missing prototypes) + +AC_DEFUN(CHECK_PROTO, [ + AC_EGREP_HEADER([[^A-Za-z0-9_]$1([ ]+[A-Za-z0-9_]*)?[ ]*\(], + $2, + , + [ + AC_MSG_RESULT([adding prototype for $1]) + AC_DEFINE(patsubst([NEED_PROTO_NAME], [NAME], translit([$1], [[a-z]], [[A-Z]]))) + ])]) + +if test "$have_remove" = yes ; then + CHECK_PROTO(remove, stdio.h) +else + CHECK_PROTO(unlink, unistd.h) +fi +CHECK_PROTO(malloc, stdlib.h) +CHECK_PROTO(getenv, stdlib.h) +CHECK_PROTO(stat, sys/stat.h) +CHECK_PROTO(lstat, sys/stat.h) +if test "$have_fgetpos" = yes ; then + CHECK_PROTO(fgetpos, stdio.h) +fi +if test "$have_truncate" = yes ; then + CHECK_PROTO(truncate, unistd.h) +fi +if test "$have_ftruncate" = yes ; then + CHECK_PROTO(ftruncate, unistd.h) +fi + + +# Output files +# ------------ + + +rm -f Makefile +if test "$enable_maintainer_mode" = yes ; then + AC_MSG_RESULT(creating maintainer Makefile) + ln -s maintainer.mak Makefile + makefile_out= +else + makefile_out=Makefile +fi +AC_OUTPUT($makefile_out) + +# vim:ts=4:sw=4: diff --git a/ctags.1 b/ctags.1 new file mode 100644 index 0000000..2d89006 --- /dev/null +++ b/ctags.1 @@ -0,0 +1,1186 @@ +.TH CTAGS 1 "Version 5.8" "Darren Hiebert" "Exuberant Ctags" + + +.SH "NAME" +ctags \- Generate tag files for source code + + +.SH SYNOPSIS +.TP 6 +\fBctags\fP [\fBoptions\fP] [\fIfile(s)\fP] +.TP 6 +\fBetags\fP [\fBoptions\fP] [\fIfile(s)\fP] + + +.SH "DESCRIPTION" +The \fBctags\fP and \fBetags\fP programs (hereinafter collectively referred to +as \fBctags\fP, except where distinguished) generate an index (or "tag") file +for a variety of language objects found in \fIfile(s)\fP. +This tag file allows these items to be quickly and easily located by a text +editor or other utility. A "tag" signifies a language object for which an +index entry is available (or, alternatively, the index entry created for that +object). + +Alternatively, \fBctags\fP can generate a cross reference file which lists, in +human readable form, information about the various source objects found in a +set of language files. + +Tag index files are supported by numerous editors, which allow the user to +locate the object associated with a name appearing in a source file and jump +to the file and line which defines the name. Those known about at the time of +this release are: + +.RS 4 +\fBVi\fP(1) and its derivatives (e.g. Elvis, Vim, Vile, Lemmy), +\fBCRiSP\fP, +\fBEmacs\fP, +\fBFTE\fP (Folding Text Editor), +\fBJED\fP, +\fBjEdit\fP, +\fBMined\fP, +\fBNEdit\fP (Nirvana Edit), +\fBTSE\fP (The SemWare Editor), +\fBUltraEdit\fP, +\fBWorkSpace\fP, +\fBX2\fP, +\fBZeus\fP +.RE + +\fBCtags\fP is capable of generating different kinds of tags for each of many +different languages. For a complete list of supported languages, the names +by which they are recognized, and the kinds of tags which are generated for +each, see the \fB\-\-list\-languages\fP and \fB\-\-list\-kinds\fP options. + + +.SH "SOURCE FILES" + +Unless the \fB\-\-language\-force\fP option is specified, the language of each +source file is automatically selected based upon a mapping of file names to +languages. The mappings in effect for each language may be display using the +\fB\-\-list\-maps\fP option and may be changed using the \fB\-\-langmap\fP option. +On platforms which support it, if the name of a file is not mapped +to a language and the file is executable, the first line of the file is +checked to see if the file is a "#!" script for a recognized language. + +By default, all other files names are ignored. This permits running +\fBctags\fP on all files in either a single directory (e.g. "ctags *"), or on +all files in an entire source directory tree (e.g. "ctags \-R"), since only +those files whose names are mapped to languages will be scanned. + +[The reason that .h extensions are mapped to C++ files rather than C files +is because it is common to use .h extensions in C++, and no harm results in +treating them as C++ files.] + + +.SH "OPTIONS" + +Despite the wealth of available options, defaults are set so that \fBctags\fP +is most commonly executed without any options (e.g. "ctags *", or "ctags \-R"), +which will create a tag file in the current directory for all recognized +source files. The options described below are provided merely to allow custom +tailoring to meet special needs. + +Note that spaces separating the single-letter options from their parameters +are optional. + +Note also that the boolean parameters to the long form options (those +beginning with "\-\-" and that take a "\fI[=yes\fP|\fIno]\fP" parameter) may +be omitted, in which case "\fB=\fP\fIyes\fP" is implied. (e.g. \fB\-\-sort\fP +is equivalent to \fB\-\-sort\fP=\fIyes\fP). Note further that "=\fI1\fP" and +"=\fIon\fP" are considered synonyms for "=\fIyes\fP", and that "=\fI0\fP" +and "=\fIoff\fP" are considered synonyms for "=\fIno\fP". + +Some options are either ignored or useful only when used while running in +etags mode (see \fB\-e\fP option). Such options will be noted. + +Most options may appear anywhere on the command line, affecting only those +files which follow the option. A few options, however, must appear before the +first file name and will be noted as such. + +Options taking language names will accept those names in either upper or lower +case. See the \fB\-\-list\-languages\fP option for a complete list of the +built-in language names. + +.TP 5 +.B \-a +Equivalent to \fB\-\-append\fP. + +.TP 5 +.B \-B +Use backward searching patterns (e.g. ?pattern?). [Ignored in etags mode] + +.TP 5 +.B \-e +Enable etags mode, which will create a tag file for use with the Emacs editor. +Alternatively, if \fBctags\fP is invoked by a name containing the string +"etags" (either by renaming, or creating a link to, the executable), etags +mode will be enabled. This option must appear before the first file name. + +.TP 5 +.BI \-f " tagfile" +Use the name specified by \fItagfile\fP for the tag file (default is "tags", +or "TAGS" when running in etags mode). If \fItagfile\fP is specified as +"\-", then the tag file is written to standard output instead. \fBCtags\fP +will stubbornly refuse to take orders if \fItagfile\fP exists and its first +line contains something other than a valid tags line. This will save your neck +if you mistakenly type "ctags \-f *.c", which would otherwise overwrite your +first C file with the tags generated by the rest! It will also refuse to +accept a multi-character file name which begins with a '\-' (dash) character, +since this most likely means that you left out the tag file name and this +option tried to grab the next option as the file name. If you really want to +name your output tag file "\-ugly", specify it as "./\-ugly". This option must +appear before the first file name. If this option is specified more than once, +only the last will apply. + +.TP 5 +.B \-F +Use forward searching patterns (e.g. /pattern/) (default). +[Ignored in etags mode] + +.TP 5 +.BI \-h " list" +Specifies a list of file extensions, separated by periods, which are to be +interpreted as include (or header) files. To indicate files having no +extension, use a period not followed by a non-period character (e.g. ".", +"..x", ".x."). This option only affects how the scoping of a particular kinds +of tags is interpreted (i.e. whether or not they are considered as globally +visible or visible only within the file in which they are defined); it does +not map the extension to any particular language. Any tag which is located in +a non-include file and cannot be seen (e.g. linked to) from another file is +considered to have file-limited (e.g. static) scope. No kind of tag appearing +in an include file will be considered to have file-limited scope. If the first +character in the list is a plus sign, then the extensions in the list will be +appended to the current list; otherwise, the list will replace the current +list. See, also, the \fB\-\-file\-scope\fP option. The default list is +".h.H.hh.hpp.hxx.h++.inc.def". To restore the default list, specify \fB\-h\fP +\fIdefault\fP. Note that if an extension supplied to this option is not +already mapped to a particular language (see \fBSOURCE FILES\fP, above), you +will also need to use either the \fB\-\-langmap\fP or \fB\-\-language\-force\fP +option. + +.TP 5 +.BI \-I " identifier\-list" +Specifies a list of identifiers which are to be specially handled while +parsing C and C++ source files. This option is specifically provided to handle +special cases arising through the use of preprocessor macros. When the +identifiers listed are simple identifiers, these identifiers will be ignored +during parsing of the source files. If an identifier is suffixed with a '+' +character, \fBctags\fP will also ignore any parenthesis-enclosed argument list +which may immediately follow the identifier in the source files. If two +identifiers are separated with the '=' character, the first identifiers is +replaced by the second identifiers for parsing purposes. The list of +identifiers may be supplied directly on the command line or read in from a +separate file. If the first character of \fIidentifier\-list\fP is '@', '.' or +a pathname separator ('/' or '\\'), or the first two characters specify a +drive letter (e.g. "C:"), the parameter \fIidentifier\-list\fP will be +interpreted as a filename from which to read a list of identifiers, one per +input line. Otherwise, \fIidentifier\-list\fP is a list of identifiers (or +identifier pairs) to be specially handled, each delimited by a either a comma +or by white space (in which case the list should be quoted to keep the entire +list as one command line argument). Multiple \fB\-I\fP options may be supplied. +To clear the list of ignore identifiers, supply a single dash ("\-") for +\fIidentifier\-list\fP. + +This feature is useful when preprocessor macros are used in such a way that +they cause syntactic confusion due to their presence. Indeed, this is the best +way of working around a number of problems caused by the presence of +syntax-busting macros in source files (see \fBCAVEATS\fP, below). Some +examples will illustrate this point. + +.RS 8 +int foo ARGDECL4(void *, ptr, long int, nbytes) +.RE + +.IP +In the above example, the macro "ARGDECL4" would be mistakenly interpreted to +be the name of the function instead of the correct name of "foo". Specifying +\fB\-I\fP \fIARGDECL4\fP results in the correct behavior. + +.RS 8 +/* creates an RCS version string in module */ +.br +MODULE_VERSION("$Revision: 690 $") +.RE + +.IP +In the above example the macro invocation looks too much like a function +definition because it is not followed by a semicolon (indeed, it could even be +followed by a global variable definition that would look much like a K&R style +function parameter declaration). In fact, this seeming function definition +could possibly even cause the rest of the file to be skipped over while trying +to complete the definition. Specifying \fB\-I\fP \fIMODULE_VERSION+\fP would +avoid such a problem. + +.RS 8 +CLASS Example { +.br + // your content here +.br +}; +.RE + +.IP +The example above uses "CLASS" as a preprocessor macro which expands to +something different for each platform. For instance CLASS may be defined as +"class __declspec(dllexport)" on Win32 platforms and simply "class" on UNIX. +Normally, the absence of the C++ keyword "class" would cause the source file +to be incorrectly parsed. Correct behavior can be restored by specifying +\fB\-I\fP \fICLASS=class\fP. + +.TP 5 +.BI \-L " file" +Read from \fIfile\fP a list of file names for which tags should be generated. +If \fIfile\fP is specified as "\-", then file names are read from standard +input. File names read using this option are processed following file names +appearing on the command line. Options are also accepted in this input. If +this option is specified more than once, only the last will apply. \fBNote:\fP +\fIfile\fP is read in line-oriented mode, where a new line is the only +delimiter and non-trailing white space is considered significant, in order +that file names containing spaces may be supplied (however, trailing white +space is stripped from lines); this can affect how options are parsed if +included in the input. + +.TP 5 +.B \-n +Equivalent to \fB\-\-excmd\fP=\fInumber\fP. + +.TP 5 +.B \-N +Equivalent to \fB\-\-excmd\fP=\fIpattern\fP. + +.TP 5 +.BI \-o " tagfile" +Equivalent to \fB\-f\fP \fItagfile\fP. + +.TP 5 +.B \-R +Equivalent to \fB\-\-recurse\fP. + +.TP 5 +.B \-u +Equivalent to \fB\-\-sort\fP=\fIno\fP (i.e. "unsorted"). + +.TP 5 +.B \-V +Equivalent to \fB\-\-verbose\fP. + +.TP 5 +.B \-w +This option is silently ignored for backward-compatibility with the ctags +of SVR4 Unix. + +.TP 5 +.B \-x +Print a tabular, human-readable cross reference (xref) file to standard output +instead of generating a tag file. The information contained in the output +includes: the tag name; the kind of tag; the line number, file name, and +source line (with extra white space condensed) of the file which defines the +tag. No tag file is written and all options affecting tag file output will be +ignored. Example applications for this feature are generating a listing of all +functions located in a source file (e.g. \fBctags \-x \-\-c\-kinds\fP=\fIf\fP +\fIfile\fP), or generating a list of all externally visible global variables +located in a source file (e.g. \fBctags \-x \-\-c\-kinds\fP=\fIv\fP +\fB\-\-file\-scope\fP=\fIno file\fP). This option must appear before the first +file name. + +.TP 5 +\fB\-\-append\fP[=\fIyes\fP|\fIno\fP] +Indicates whether tags generated from the specified files should be appended +to those already present in the tag file or should replace them. This option +is off by default. This option must appear before the first file name. + +.TP 5 +\fB\-\-etags\-include\fP=\fIfile\fP +Include a reference to \fIfile\fP in the tag file. This option may be +specified as many times as desired. This supports Emacs' capability to use a +tag file which "includes" other tag files. [Available only in etags mode] + +.TP 5 +\fB\-\-exclude\fP=[\fIpattern\fP] +Add \fIpattern\fP to a list of excluded files and directories. This option +may be specified as many times as desired. For each file name considered by +\fBctags\fP, each \fIpattern\fP specified using this option will be compared +against both the complete path (e.g. some/path/base.ext) and the base name +(e.g. base.ext) of the file, thus allowing patterns which match a given file +name irrespective of its path, or match only a specific path. If appropriate +support is available from the runtime library of your C compiler, then +\fIpattern\fP may contain the usual shell wildcards (not regular expressions) +common on Unix (be sure to quote the option parameter to protect the wildcards +from being expanded by the shell before being passed to \fBctags\fP; also be +aware that wildcards can match the slash character, '/'). You can determine if +shell wildcards are available on your platform by examining the output of the +\fB\-\-version\fP option, which will include "+wildcards" in the compiled +feature list; otherwise, \fIpattern\fP is matched against file names using a +simple textual comparison. + +If \fIpattern\fP begins with the character '@', then the rest of the string +is interpreted as a file name from which to read exclusion patterns, one per +line. If \fIpattern\fP is empty, the list of excluded patterns is cleared. +Note that at program startup, the default exclude list contains "EIFGEN", +"SCCS", "RCS", and "CVS", which are names of directories for which it is +generally not desirable to descend while processing the \fB\-\-recurse\fP +option. + +.TP 5 +\fB\-\-excmd\fP=\fItype\fP +Determines the type of EX command used to locate tags in the source file. +[Ignored in etags mode] + +The valid values for \fItype\fP (either the entire word or the first letter is +accepted) are: + +.RS 5 +.TP 9 +.I number +Use only line numbers in the tag file for locating tags. This has four +advantages: +.PD 0 +.RS 9 +.TP 4 +1. +Significantly reduces the size of the resulting tag file. +.TP 4 +2. +Eliminates failures to find tags because the line defining the tag has +changed, causing the pattern match to fail (note that some editors, such as +\fBvim\fP, are able to recover in many such instances). +.TP 4 +3. +Eliminates finding identical matching, but incorrect, source lines (see +\fBBUGS\fP, below). +.TP 4 +4. +Retains separate entries in the tag file for lines which are identical in +content. In \fIpattern\fP mode, duplicate entries are dropped because the +search patterns they generate are identical, making the duplicate entries +useless. +.RE +.PD 1 + +.IP +However, this option has one significant drawback: changes to the source files +can cause the line numbers recorded in the tag file to no longer correspond +to the lines in the source file, causing jumps to some tags to miss the target +definition by one or more lines. Basically, this option is best used when the +source code to which it is applied is not subject to change. Selecting this +option type causes the following options to be ignored: \fB\-BF\fP. + +.TP 9 +.I pattern +Use only search patterns for all tags, rather than the line numbers usually +used for macro definitions. This has the advantage of not referencing obsolete +line numbers when lines have been added or removed since the tag file was +generated. + +.TP 9 +.I mixed +In this mode, patterns are generally used with a few exceptions. For C, line +numbers are used for macro definition tags. This was the default format +generated by the original \fBctags\fP and is, therefore, retained as the +default for this option. For Fortran, line numbers are used for common blocks +because their corresponding source lines are generally identical, making +pattern searches useless for finding all matches. +.RE + +.TP 5 +\fB\-\-extra\fP=\fI[+|\-]flags\fP +Specifies whether to include extra tag entries for certain kinds of +information. The parameter \fIflags\fP is a set of one-letter flags, each +representing one kind of extra tag entry to include in the tag file. If +\fIflags\fP is preceded by by either the '+' or '\-' character, the effect of +each flag is added to, or removed from, those currently enabled; otherwise the +flags replace any current settings. The meaning of each flag is as follows: + +.PP +.RS 8 +.TP 4 +.I f +Include an entry for the base file name of every source file (e.g. +"example.c"), which addresses the first line of the file. +.TP 4 +.I q +Include an extra class-qualified tag entry for each tag which is a member +of a class (for languages for which this information is extracted; currently +C++, Eiffel, and Java). The actual form of the qualified tag depends upon the +language from which the tag was derived (using a form that is most natural for +how qualified calls are specified in the language). For C++, it is in the form +"class::member"; for Eiffel and Java, it is in the form "class.member". This +may allow easier location of a specific tags when multiple occurrences of a +tag name occur in the tag file. Note, however, that this could potentially +more than double the size of the tag file. +.RE + +.TP 5 +\fB\-\-fields\fP=\fI[+|\-]flags\fP +Specifies the available extension fields which are to be included in the +entries of the tag file (see \fBTAG FILE FORMAT\fP, below, for more +information). The parameter \fIflags\fP is a set of one-letter flags, each +representing one type of extension field to include, with the following +meanings (disabled by default unless indicated): + +.PP +.PD 0 +.RS 8 +.TP 4 +.I a +Access (or export) of class members +.TP 4 +.I f +File-restricted scoping [enabled] +.TP 4 +.I i +Inheritance information +.TP 4 +.I k +Kind of tag as a single letter [enabled] +.TP 4 +.I K +Kind of tag as full name +.TP 4 +.I l +Language of source file containing tag +.TP 4 +.I m +Implementation information +.TP 4 +.I n +Line number of tag definition +.TP 4 +.I s +Scope of tag definition [enabled] +.TP 4 +.I S +Signature of routine (e.g. prototype or parameter list) +.TP 4 +.I z +Include the "kind:" key in kind field +.TP 4 +.I t +Type and name of a variable or typedef as "typeref:" field [enabled] +.PD 1 +.RE + +.RS 5 +Each letter or group of letters may be preceded by either '+' to add it to the +default set, or '\-' to exclude it. In the absence of any preceding '+' or '\-' +sign, only those kinds explicitly listed in \fIflags\fP will be included in +the output (i.e. overriding the default set). This option is ignored if the +option \fB\-\-format\fP=\fI1\fP has been specified. The default value +of this option is \fIfkst\fP. +.RE + +.TP 5 +\fB\-\-file\-scope\fP[=\fIyes\fP|\fIno\fP] +Indicates whether tags scoped only for a single file (i.e. tags which cannot +be seen outside of the file in which they are defined, such as "static" tags) +should be included in the output. See, also, the \fB\-h\fP option. This option +is enabled by default. + +.TP 5 +\fB\-\-filter\fP[=\fIyes\fP|\fIno\fP] +Causes \fBctags\fP to behave as a filter, reading source file names from +standard input and printing their tags to standard output on a file-by-file +basis. If \fB\-\-sorted\fP is enabled, tags are sorted only within the source +file in which they are defined. File names are read from standard input in +line-oriented input mode (see note for \fB\-L\fP option) and only after file +names listed on the command line or from any file supplied using the \fB\-L\fP +option. When this option is enabled, the options \fB\-f\fP, \fB\-o\fP, +and \fB\-\-totals\fP are ignored. This option is quite esoteric and is disabled +by default. This option must appear before the first file name. + +.TP 5 +\fB\-\-filter\-terminator\fP=\fIstring\fP +Specifies a string to print to standard output following the tags for each +file name parsed when the \fB\-\-filter\fP option is enabled. This may permit an +application reading the output of ctags to determine when the output for each +file is finished. Note that if the file name read is a directory and +\fB\-\-recurse\fP is enabled, this string will be printed only one once at the +end of all tags found for by descending the directory. This string will always +be separated from the last tag line for the file by its terminating newline. +This option is quite esoteric and is empty by default. This option must appear +before the first file name. + +.TP 5 +\fB\-\-format\fP=\fIlevel\fP +Change the format of the output tag file. Currently the only valid values for +\fIlevel\fP are \fI1\fP or \fI2\fP. Level 1 specifies the original tag file +format and level 2 specifies a new extended format containing extension fields +(but in a manner which retains backward-compatibility with original +\fBvi\fP(1) implementations). The default level is 2. This option must appear +before the first file name. [Ignored in etags mode] + +.TP 5 +.B \-\-help +Prints to standard output a detailed usage description, and then exits. + +.TP 5 +\fB\-\-if0\fP[=\fIyes\fP|\fIno\fP] +Indicates a preference as to whether code within an "#if 0" branch of a +preprocessor conditional should be examined for non-macro tags (macro tags are +always included). Because the intent of this construct is to disable code, the +default value of this option is \fIno\fP. Note that this indicates a +preference only and does not guarantee skipping code within an "#if 0" branch, +since the fall-back algorithm used to generate tags when preprocessor +conditionals are too complex follows all branches of a conditional. This +option is disabled by default. + +.TP 5 +\fB\-\-\-kinds\fP=\fI[+|\-]kinds\fP +Specifies a list of language-specific kinds of tags (or kinds) to include in +the output file for a particular language, where \fB\fP is +case-insensitive and is one of the built-in language names (see the +\fB\-\-list\-languages\fP option for a complete list). The parameter \fIkinds\fP +is a group of one-letter flags designating kinds of tags (particular to the +language) to either include or exclude from the output. The specific sets of +flags recognized for each language, their meanings and defaults may be list +using the \fB\-\-list\-kinds\fP option. Each letter or group of letters may be +preceded by either '+' to add it to, or '\-' to remove it from, the default +set. In the absence of any preceding '+' or '\-' sign, only those kinds +explicitly listed in \fIkinds\fP will be included in the output (i.e. +overriding the default for the specified language). + +As an example for the C language, in order to add prototypes and external +variable declarations to the default set of tag kinds, but exclude macros, +use \fB\-\-c\-kinds\fP=\fI+px\-d\fP; to include only tags for functions, use +\fB\-\-c\-kinds\fP=\fIf\fP. + +.TP 5 +\fB\-\-langdef\fP=\fIname\fP +Defines a new user-defined language, \fIname\fP, to be parsed with regular +expressions. Once defined, \fIname\fP may be used in other options taking +language names. The typical use of this option is to first define the +language, then map file names to it using \fI\-\-langmap\fP, then specify +regular expressions using \fI\-\-regex\-\fP to define how its tags are +found. + +.TP 5 +\fB\-\-langmap\fP=\fImap[,map[...]]\fP +Controls how file names are mapped to languages (see the \fB\-\-list\-maps\fP +option). Each comma-separated \fImap\fP consists of the language name (either +a built-in or user-defined language), a colon, and a list of file extensions +and/or file name patterns. A file extension is specified by preceding the +extension with a period (e.g. ".c"). A file name pattern is specified by +enclosing the pattern in parentheses (e.g. "([Mm]akefile)"). If appropriate +support is available from the runtime library of your C compiler, then the +file name pattern may contain the usual shell wildcards common on Unix (be +sure to quote the option parameter to protect the wildcards from being +expanded by the shell before being passed to \fBctags\fP). You can determine +if shell wildcards are available on your platform by examining the output of +the \fB\-\-version\fP option, which will include "+wildcards" in the compiled +feature list; otherwise, the file name patterns are matched against file names +using a simple textual comparison. When mapping a file extension, it will +first be unmapped from any other languages. + +If the first character in a map is a plus sign, then the extensions and file +name patterns in that map will be appended to the current map for that +language; otherwise, the map will replace the current map. For example, to +specify that only files with extensions of .c and .x are to be treated as C +language files, use "\fB\-\-langmap\fP=\fIc:.c.x\fP"; to also add files with +extensions of .j as Java language files, specify +"\fB\-\-langmap\fP=\fIc:.c.x,java:+.j\fP". To map makefiles (e.g. files +named either "Makefile", "makefile", or having the extension ".mak") to a +language called "make", specify "\fB\-\-langmap\fP=\fImake:([Mm]akefile).mak\fP". +To map files having no extension, specify a period not followed by a +non-period character (e.g. ".", "..x", ".x."). To clear the mapping for a +particular language (thus inhibiting automatic generation of tags for that +language), specify an empty extension list (e.g. +"\fB\-\-langmap\fP=\fIfortran:\fP"). To restore the default language mappings +for all a particular language, supply the keyword "default" for the mapping. +To specify restore the default language mappings for all languages, specify +"\fB\-\-langmap\fP=\fIdefault\fP". Note that file extensions are tested before +file name patterns when inferring the language of a file. + +.TP 5 +\fB\-\-language\-force\fP=\fIlanguage\fP +By default, \fBctags\fP automatically selects the language of a source file, +ignoring those files whose language cannot be determined (see +\fBSOURCE FILES\fP, above). This option forces the specified \fIlanguage\fP +(case-insensitive; either built-in or user-defined) to be used for every +supplied file instead of automatically selecting the language based upon its +extension. In addition, the special value \fIauto\fP indicates that the +language should be automatically selected (which effectively disables this +option). + +.TP 5 +\fB\-\-languages\fP=\fI[+|\-]list\fP +Specifies the languages for which tag generation is enabled, with \fIlist\fP +containing a comma-separated list of language names (case-insensitive; either +built-in or user-defined). If the first language of \fIlist\fP is not preceded +by either a '+' or '\-', the current list will be cleared before adding or +removing the languages in \fIlist\fP. Until a '\-' is encountered, each +language in the list will be added to the current list. As either the '+' or +\&'\-' is encountered in the list, the languages following it are added or +removed from the current list, respectively. Thus, it becomes simple to +replace the current list with a new one, or to add or remove languages from +the current list. The actual list of files for which tags will be generated +depends upon the language extension mapping in effect (see the \fB\-\-langmap\fP +option). Note that all languages, including user-defined languages are enabled +unless explicitly disabled using this option. Language names included in +\fIlist\fP may be any built-in language or one previously defined with +\fB\-\-langdef\fP. The default is "all", which is also accepted as a valid +argument. See the \fB\-\-list\-languages\fP option for a complete list of the +built-in language names. + +.TP 5 +\fB\-\-license\fP +Prints a summary of the software license to standard output, and then exits. + +.TP 5 +\fB\-\-line\-directives\fP[=\fIyes\fP|\fIno\fP] +Specifies whether "#line" directives should be recognized. These are present +in the output of preprocessors and contain the line number, and possibly the +file name, of the original source file(s) from which the preprocessor output +file was generated. When enabled, this option will cause \fBctags\fP to +generate tag entries marked with the file names and line numbers of their +locations original source file(s), instead of their actual locations in the +preprocessor output. The actual file names placed into the tag file will have +the same leading path components as the preprocessor output file, since it is +assumed that the original source files are located relative to the +preprocessor output file (unless, of course, the #line directive specifies an +absolute path). This option is off by default. \fBNote:\fP This option is +generally only useful when used together with the \fB\-\-excmd\fP=\fInumber\fP +(\fB\-n\fP) option. Also, you may have to use either the \fB\-\-langmap\fP or +\fB\-\-language\-force\fP option if the extension of the preprocessor output file +is not known to \fBctags\fP. + +.TP 5 +\fB\-\-links\fP[=\fIyes\fP|\fIno\fP] +Indicates whether symbolic links (if supported) should be followed. When +disabled, symbolic links are ignored. This option is on by default. + +.TP 5 +\fB\-\-list\-kinds\fP[=\fIlanguage\fP|\fIall\fP] +Lists the tag kinds recognized for either the specified language or all +languages, and then exits. Each kind of tag recorded in the tag file is +represented by a one-letter flag, which is also used to filter the tags placed +into the output through use of the \fB\-\-\-kinds\fP option. Note that some +languages and/or tag kinds may be implemented using regular expressions and +may not be available if regex support is not compiled into \fBctags\fP (see +the \fB\-\-regex\-\fP option). Each kind listed is enabled unless followed +by "[off]". + +.TP 5 +\fB\-\-list\-maps\fP[=\fIlanguage\fP|\fIall\fP] +Lists the file extensions and file name patterns which associate a file name +with a language for either the specified language or all languages, and then +exits. See the \fB\-\-langmap\fP option, and \fBSOURCE FILES\fP, above. + +.TP 5 +\fB\-\-list\-languages\fP +Lists the names of the languages understood by \fBctags\fP, and then exits. +These language names are case insensitive and may be used in the +\fB\-\-language\-force\fP, \fB\-\-languages\fP, \fB\-\-\-kinds\fP, and +\fB\-\-regex\-\fP options. + +.TP 5 +\fB\-\-options\fP=\fIfile\fP +Read additional options from \fIfile\fP. The file should contain one option +per line. As a special case, if +\fB\-\-options\fP=\fINONE\fP is specified as the first option on the command +line, it will disable the automatic reading of any configuration options from +either a file or the environment (see \fBFILES\fP). + +.TP 5 +\fB\-\-recurse\fP[=\fIyes\fP|\fIno\fP] +Recurse into directories encountered in the list of supplied files. If the +list of supplied files is empty and no file list is specified with the +\fB\-L\fP option, then the current directory (i.e. ".") is assumed. Symbolic +links are followed. If you don't like these behaviors, either explicitly +specify the files or pipe the output of \fBfind\fP(1) into \fBctags \-L\-\fP +instead. \fBNote:\fP This option is not supported on all platforms at present. +It is available if the output of the \fB\-\-help\fP option includes this option. +See, also, the \fB\-\-exclude\fP to limit recursion. + +.TP 5 +\fB\-\-regex\-\fP=\fI/regexp/replacement/[kind\-spec/][flags]\fP +The \fI/regexp/replacement/\fP pair define a regular expression replacement +pattern, similar in style to \fBsed\fP substitution commands, with which to +generate tags from source files mapped to the named language, \fB\fP, +(case-insensitive; either a built-in or user-defined language). The regular +expression, \fIregexp\fP, defines an extended regular expression (roughly that +used by \fBegrep\fP(1)), which is used to locate a single source line +containing a tag and may specify tab characters using \\t. When a matching +line is found, a tag will be generated for the name defined by +\fIreplacement\fP, which generally will contain the special back-references +\\1 through \\9 to refer to matching sub-expression groups within +\fIregexp\fP. The '/' separator characters shown in the parameter to the +option can actually be replaced by any character. Note that whichever +separator character is used will have to be escaped with a backslash ('\\') +character wherever it is used in the parameter as something other than a +separator. The regular expression defined by this option is added to the +current list of regular expressions for the specified language unless the +parameter is omitted, in which case the current list is cleared. + +Unless modified by \fIflags\fP, \fIregexp\fP is interpreted as a Posix +extended regular expression. The \fIreplacement\fP should expand for all +matching lines to a non-empty string of characters, or a warning message will +be reported. An optional kind specifier for tags matching \fIregexp\fP may +follow \fIreplacement\fP, which will determine what kind of tag is reported in +the "kind" extension field (see \fBTAG FILE FORMAT\fP, below). The full form +of \fIkind\-spec\fP is in the form of a single letter, a comma, a name (without +spaces), a comma, a description, followed by a separator, which specify +the short and long forms of the kind value and its textual description +(displayed using \fB\-\-list\-kinds\fP). Either the kind name and/or the +description may be omitted. If \fIkind\-spec\fP is omitted, it defaults to +"\fIr,regex\fP". Finally, \fIflags\fP are one or more single-letter characters +having the following effect upon the interpretation of \fIregexp\fP: + +.PP +.RS 8 +.TP 4 +.I b +The pattern is interpreted as a Posix basic regular expression. +.TP 4 +.I e +The pattern is interpreted as a Posix extended regular expression (default). +.TP 4 +.I i +The regular expression is to be applied in a case-insensitive manner. +.RE + +.RS 5 +Note that this option is available only if \fBctags\fP was compiled with +support for regular expressions, which depends upon your platform. You can +determine if support for regular expressions is compiled in by examining the +output of the \fB\-\-version\fP option, which will include "+regex" in the +compiled feature list. + +For more information on the regular expressions used by \fBctags\fP, see +either the \fBregex(5,7)\fP man page, or the GNU info documentation for regex +(e.g. "info regex"). +.RE + +.TP 5 +\fB\-\-sort\fP[=\fIyes\fP|\fIno\fP|\fIfoldcase\fP] +Indicates whether the tag file should be sorted on the tag name (default is +\fIyes\fP). Note that the original \fBvi\fP(1) required sorted tags. +The \fIfoldcase\fP value specifies case insensitive (or case-folded) sorting. +Fast binary searches of tag files sorted with case-folding will require +special support from tools using tag files, such as that found in the ctags +readtags library, or Vim version 6.2 or higher (using "set ignorecase"). This +option must appear before the first file name. [Ignored in etags mode] + +.TP 5 +\fB\-\-tag\-relative\fP[=\fIyes\fP|\fIno\fP] +Indicates that the file paths recorded in the tag file should be relative to +the directory containing the tag file, rather than relative to the current +directory, unless the files supplied on the command line are specified with +absolute paths. This option must appear before the first file name. The +default is \fIyes\fP when running in etags mode (see the \fB\-e\fP +option), \fIno\fP otherwise. + +.TP 5 +\fB\-\-totals\fP[=\fIyes\fP|\fIno\fP] +Prints statistics about the source files read and the tag file written during +the current invocation of \fBctags\fP. This option is off by default. +This option must appear before the first file name. + +.TP 5 +\fB\-\-verbose\fP[=\fIyes\fP|\fIno\fP] +Enable verbose mode. This prints out information on option processing and a +brief message describing what action is being taken for each file considered +by \fBctags\fP. Normally, \fBctags\fP does not read command line arguments +until after options are read from the configuration files (see \fBFILES\fP, +below) and the \fBCTAGS\fP environment variable. However, if this option is +the first argument on the command line, it will take effect before any options +are read from these sources. The default is \fIno\fP. + +.TP 5 +\fB\-\-version\fP +Prints a version identifier for \fBctags\fP to standard output, and then +exits. This is guaranteed to always contain the string "Exuberant Ctags". + + +.SH "OPERATIONAL DETAILS" + +As \fBctags\fP considers each file name in turn, it tries to determine the +language of the file by applying the following three tests in order: if the +file extension has been mapped to a language, if the file name matches a shell +pattern mapped to a language, and finally if the file is executable and its +first line specifies an interpreter using the Unix-style "#!" specification +(if supported on the platform). If a language was identified, the file is +opened and then the appropriate language parser is called to operate on the +currently open file. The parser parses through the file and adds an entry to +the tag file for each language object it is written to handle. See +\fBTAG FILE FORMAT\fP, below, for details on these entries. + +This implementation of \fBctags\fP imposes no formatting requirements on C +code as do legacy implementations. Older implementations of ctags tended to +rely upon certain formatting assumptions in order to help it resolve coding +dilemmas caused by preprocessor conditionals. + +In general, \fBctags\fP tries to be smart about conditional preprocessor +directives. If a preprocessor conditional is encountered within a statement +which defines a tag, \fBctags\fP follows only the first branch of that +conditional (except in the special case of "#if 0", in which case it follows +only the last branch). The reason for this is that failing to pursue only one +branch can result in ambiguous syntax, as in the following example: + +.RS +#ifdef TWO_ALTERNATIVES +.br +struct { +.br +#else +.br +union { +.br +#endif +.RS 4 +short a; +.br +long b; +.RE +} +.RE + +Both branches cannot be followed, or braces become unbalanced and \fBctags\fP +would be unable to make sense of the syntax. + +If the application of this heuristic fails to properly parse a file, +generally due to complicated and inconsistent pairing within the conditionals, +\fBctags\fP will retry the file using a different heuristic which does not +selectively follow conditional preprocessor branches, but instead falls back +to relying upon a closing brace ("}") in column 1 as indicating the end of a +block once any brace imbalance results from following a #if conditional branch. + +\fBCtags\fP will also try to specially handle arguments lists enclosed in +double sets of parentheses in order to accept the following conditional +construct: + +.RS +extern void foo __ARGS((int one, char two)); +.RE + +Any name immediately preceding the "((" will be automatically ignored and +the previous name will be used. + +C++ operator definitions are specially handled. In order for consistency with +all types of operators (overloaded and conversion), the operator name in the +tag file will always be preceded by the string "operator " (i.e. even if the +actual operator definition was written as "operator<<"). + +After creating or appending to the tag file, it is sorted by the tag name, +removing identical tag lines. + + +.SH "TAG FILE FORMAT" + +When not running in etags mode, each entry in the tag file consists of a +separate line, each looking like this in the most general case: + +.RS 1 +tag_namefile_nameex_cmd;"extension_fields +.RE + +The fields and separators of these lines are specified as follows: + +.PD 0 +.RS 4 +.TP 4 +1. +tag name +.TP 4 +2. +single tab character +.TP 4 +3. +name of the file in which the object associated with the tag is located +.TP 4 +4. +single tab character +.TP 4 +5. +EX command used to locate the tag within the file; generally a search pattern +(either /pattern/ or ?pattern?) or line number (see \fB\-\-excmd\fP). Tag file +format 2 (see \fB\-\-format\fP) extends this EX command under certain +circumstances to include a set of extension fields (described below) embedded +in an EX comment immediately appended to the EX command, which leaves it +backward-compatible with original \fBvi\fP(1) implementations. +.RE +.PD 1 + +A few special tags are written into the tag file for internal purposes. These +tags are composed in such a way that they always sort to the top of the file. +Therefore, the first two characters of these tags are used a magic number to +detect a tag file for purposes of determining whether a valid tag file is +being overwritten rather than a source file. + +Note that the name of each source file will be recorded in the tag file +exactly as it appears on the command line. Therefore, if the path you +specified on the command line was relative to the current directory, then it +will be recorded in that same manner in the tag file. See, however, the +\fB\-\-tag\-relative\fP option for how this behavior can be modified. + +Extension fields are tab-separated key-value pairs appended to the end of the +EX command as a comment, as described above. These key value pairs appear in +the general form "\fIkey\fP:\fIvalue\fP". Their presence in the lines of the +tag file are controlled by the \fB\-\-fields\fP option. The possible keys and +the meaning of their values are as follows: + +.TP 12 +.I access +Indicates the visibility of this class member, where \fIvalue\fP is specific +to the language. + +.TP 12 +.I file +Indicates that the tag has file-limited visibility. This key has no +corresponding value. + +.TP 12 +.I kind +Indicates the type, or kind, of tag. Its value is either one of the +corresponding one-letter flags described under the various +\fB\-\-\-kinds\fP options above, or a full name. It is permitted (and is, +in fact, the default) for the key portion of this field to be omitted. The +optional behaviors are controlled with the \fB\-\-fields\fP option. + +.TP 12 +.I implementation +When present, this indicates a limited implementation (abstract vs. concrete) +of a routine or class, where \fIvalue\fP is specific to the language +("virtual" or "pure virtual" for C++; "abstract" for Java). + +.TP 12 +.I inherits +When present, \fIvalue\fP. is a comma-separated list of classes from which +this class is derived (i.e. inherits from). + +.TP 12 +.I signature +When present, \fIvalue\fP is a language-dependent representation of the +signature of a routine. A routine signature in its complete form specifies the +return type of a routine and its formal argument list. This extension field is +presently supported only for C-based languages and does not include the return +type. + +.PP +In addition, information on the scope of the tag definition may be available, +with the key portion equal to some language-dependent construct name and its +value the name declared for that construct in the program. This scope entry +indicates the scope in which the tag was found. For example, a tag generated +for a C structure member would have a scope looking like "struct:myStruct". + + +.SH "HOW TO USE WITH VI" +Vi will, by default, expect a tag file by the name "tags" in the current +directory. Once the tag file is built, the following commands exercise the tag +indexing feature: +.TP 12 +.B vi \-t tag +Start vi and position the cursor at the file and line where "tag" is defined. +.TP 12 +.B :ta tag +Find a tag. +.TP 12 +.B Ctrl-] +Find the tag under the cursor. +.TP 12 +.B Ctrl-T +Return to previous location before jump to tag (not widely implemented). + + +.SH "HOW TO USE WITH GNU EMACS" +Emacs will, by default, expect a tag file by the name "TAGS" in the current +directory. Once the tag file is built, the following commands exercise the +tag indexing feature: +.TP 10 +.B "M-x visit\-tags\-table FILE " +Select the tag file, "FILE", to use. +.TP 10 +.B "M-. [TAG] " +Find the first definition of TAG. The default tag is the identifier under the +cursor. +.TP 10 +.B "M-*" +Pop back to where you previously invoked "M-.". +.TP 10 +.B "C-u M-." +Find the next definition for the last tag. + +.PP +For more commands, see the \fITags\fP topic in the Emacs info document. + + +.SH "HOW TO USE WITH NEDIT" +NEdit version 5.1 and later can handle the new extended tag file format (see +\fB\-\-format\fP). To make NEdit use the tag file, select "File\->Load Tags +File". To jump to the definition for a tag, highlight the word, the press +Ctrl-D. NEdit 5.1 can can read multiple tag files from different directories. +Setting the X resource nedit.tagFile to the name of a tag file instructs NEdit +to automatically load that tag file at startup time. + + +.SH "CAVEATS" +Because \fBctags\fP is neither a preprocessor nor a compiler, use of +preprocessor macros can fool \fBctags\fP into either missing tags or +improperly generating inappropriate tags. Although \fBctags\fP has been +designed to handle certain common cases, this is the single biggest cause of +reported problems. In particular, the use of preprocessor constructs which +alter the textual syntax of C can fool \fBctags\fP. You can work around many +such problems by using the \fB\-I\fP option. + +Note that since \fBctags\fP generates patterns for locating tags (see +the \fB\-\-excmd\fP option), it is entirely possible that the wrong line may be +found by your editor if there exists another source line which is identical to +the line containing the tag. The following example demonstrates this condition: + +.RS +int variable; + +/* ... */ +.br +void foo(variable) +.br +int variable; +.br +{ +.RS 4 +/* ... */ +.RE +} +.RE + +Depending upon which editor you use and where in the code you happen to be, it +is possible that the search pattern may locate the local parameter declaration +in foo() before it finds the actual global variable definition, since the +lines (and therefore their search patterns are identical). This can be avoided +by use of the \fB\-\-excmd\fP=\fIn\fP option. + + +.SH "BUGS" +\fBCtags\fP has more options than \fBls\fP(1). + +When parsing a C++ member function definition (e.g. "className::function"), +\fBctags\fP cannot determine whether the scope specifier is a class name or a +namespace specifier and always lists it as a class name in the scope portion +of the extension fields. Also, if a C++ function is defined outside of the +class declaration (the usual case), the access specification (i.e. public, +protected, or private) and implementation information (e.g. virtual, pure +virtual) contained in the function declaration are not known when the tag is +generated for the function definition. It will, however be available for +prototypes (e.g \fB\-\-c++\-kinds\fP=\fI+p\fP). + +No qualified tags are generated for language objects inherited into a class. + + +.SH "ENVIRONMENT VARIABLES" + +.TP 8 +.B CTAGS +If this environment variable exists, it will be expected to contain a set of +default options which are read when \fBctags\fP starts, after the +configuration files listed in \fBFILES\fP, below, are read, but before any +command line options are read. Options appearing on the command line will +override options specified in this variable. Only options will be read from +this variable. Note that all white space in this variable is considered a +separator, making it impossible to pass an option parameter containing an +embedded space. If this is a problem, use a configuration file instead. + +.TP 8 +.B ETAGS +Similar to the \fBCTAGS\fP variable above, this variable, if found, will be +read when \fBetags\fP starts. If this variable is not found, \fBetags\fP will +try to use \fBCTAGS\fP instead. + +.TP 8 +.B TMPDIR +On Unix-like hosts where mkstemp() is available, the value of this variable +specifies the directory in which to place temporary files. This can be useful +if the size of a temporary file becomes too large to fit on the partition +holding the default temporary directory defined at compilation time. +\fBctags\fP creates temporary files only if either (1) an emacs-style tag file +is being generated, (2) the tag file is being sent to standard output, or (3) +the program was compiled to use an internal sort algorithm to sort the tag +files instead of the the sort utility of the operating system. If the sort +utility of the operating system is being used, it will generally observe this +variable also. Note that if \fBctags\fP is setuid, the value of TMPDIR will be +ignored. + + +.SH "FILES" +.PD 0 +.I /ctags.cnf (on MSDOS, MSWindows only) +.br +.I /etc/ctags.conf +.br +.I /usr/local/etc/ctags.conf +.br +.I $HOME/.ctags +.br +.I $HOME/ctags.cnf (on MSDOS, MSWindows only) +.br +.I .ctags +.br +.I ctags.cnf (on MSDOS, MSWindows only) +.IP +If any of these configuration files exist, each will be expected to contain a +set of default options which are read in the order listed when \fBctags\fP +starts, but before the \fBCTAGS\fP environment variable is read or any command +line options are read. This makes it possible to set up site-wide, personal +or project-level defaults. It is possible to compile \fBctags\fP to read an +additional configuration file before any of those shown above, which will be +indicated if the output produced by the \fB\-\-version\fP option lists the +"custom-conf" feature. Options appearing in the \fBCTAGS\fP environment +variable or on the command line will override options specified in these +files. Only options will be read from these files. Note that the option files +are read in line-oriented mode in which spaces are significant (since +shell quoting is not possible). Each line of the file is read as one command +line parameter (as if it were quoted with single quotes). Therefore, use new +lines to indicate separate command-line arguments. +.PD 1 + +.TP +.I tags +The default tag file created by \fBctags\fP. +.TP +.I TAGS +The default tag file created by \fBetags\fP. + +.SH "SEE ALSO" +The official Exuberant Ctags web site at: + +.RS +http://ctags.sourceforge.net +.RE + +Also \fBex\fP(1), \fBvi\fP(1), \fBelvis\fP, or, better yet, \fBvim\fP, the +official editor of \fBctags\fP. For more information on \fBvim\fP, see the VIM +Pages web site at: + +.RS +http://www.vim.org/ +.RE + + +.SH "AUTHOR" +Darren Hiebert +.br +http://DarrenHiebert.com/ + + +.SH "MOTIVATION" +"Think ye at all times of rendering some service to every member of the human +race." + +"All effort and exertion put forth by man from the fullness of his heart is +worship, if it is prompted by the highest motives and the will to do service +to humanity." + +.RS +\-\- From the Baha'i Writings +.RE + + +.SH "CREDITS" +This version of \fBctags\fP was originally derived from and inspired by the +ctags program by Steve Kirkendall that comes with the +Elvis vi clone (though virtually none of the original code remains). + +Credit is also due Bram Moolenaar , the author of \fBvim\fP, who +has devoted so much of his time and energy both to developing the editor as a +service to others, and to helping the orphans of Uganda. + +The section entitled "HOW TO USE WITH GNU EMACS" was shamelessly stolen from +the info page for GNU \fBetags\fP. diff --git a/ctags.h b/ctags.h new file mode 100644 index 0000000..f8884af --- /dev/null +++ b/ctags.h @@ -0,0 +1,28 @@ +/* +* $Id: ctags.h 702 2009-03-14 03:52:21Z dhiebert $ +* +* Copyright (c) 1996-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Program definitions +*/ +#ifndef _CTAGS_H +#define _CTAGS_H + +/* +* MACROS +*/ +#ifndef PROGRAM_VERSION +# define PROGRAM_VERSION "5.8" +#endif +#define PROGRAM_NAME "Exuberant Ctags" +#define PROGRAM_URL "http://ctags.sourceforge.net" +#define PROGRAM_COPYRIGHT "Copyright (C) 1996-2009" +#define AUTHOR_NAME "Darren Hiebert" +#define AUTHOR_EMAIL "dhiebert@users.sourceforge.net" + +#endif /* _CTAGS_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/ctags.html b/ctags.html new file mode 100644 index 0000000..6ff0fdd --- /dev/null +++ b/ctags.html @@ -0,0 +1,2087 @@ + + + + + + + + + +CTAGS + + + + +

CTAGS

+ +NAME
+SYNOPSIS
+DESCRIPTION
+SOURCE FILES
+OPTIONS
+OPERATIONAL DETAILS
+TAG FILE FORMAT
+HOW TO USE WITH VI
+HOW TO USE WITH GNU EMACS
+HOW TO USE WITH NEDIT
+CAVEATS
+BUGS
+ENVIRONMENT VARIABLES
+FILES
+SEE ALSO
+AUTHOR
+MOTIVATION
+CREDITS
+ +
+ + + +

NAME

+ + +

ctags − +Generate tag files for source code

+ + +

SYNOPSIS

+ + +

ctags +[options] [file(s)]
+etags
[options] [file(s)]

+ + +

DESCRIPTION

+ + +

The +ctags and etags programs (hereinafter +collectively referred to as ctags, except where +distinguished) generate an index (or "tag") file +for a variety of language objects found in file(s). +This tag file allows these items to be quickly and easily +located by a text editor or other utility. A "tag" +signifies a language object for which an index entry is +available (or, alternatively, the index entry created for +that object).

+ +

Alternatively, +ctags can generate a cross reference file which +lists, in human readable form, information about the various +source objects found in a set of language files.

+ +

Tag index files +are supported by numerous editors, which allow the user to +locate the object associated with a name appearing in a +source file and jump to the file and line which defines the +name. Those known about at the time of this release are:

+ +

Vi(1) +and its derivatives (e.g. Elvis, Vim, Vile, Lemmy), +CRiSP, Emacs, FTE (Folding Text +Editor), JED, jEdit, Mined, +NEdit (Nirvana Edit), TSE (The SemWare +Editor), UltraEdit, WorkSpace, X2, +Zeus

+ +

Ctags is +capable of generating different kinds of tags for each of +many different languages. For a complete list of supported +languages, the names by which they are recognized, and the +kinds of tags which are generated for each, see the +−−list−languages and +−−list−kinds options.

+ + +

SOURCE FILES

+ + +

Unless the +−−language−force option is +specified, the language of each source file is automatically +selected based upon a mapping of file names to languages. +The mappings in effect for each language may be display +using the −−list−maps option and +may be changed using the −−langmap +option. On platforms which support it, if the name of a file +is not mapped to a language and the file is executable, the +first line of the file is checked to see if the file is a +"#!" script for a recognized language.

+ +

By default, all +other files names are ignored. This permits running +ctags on all files in either a single directory (e.g. +"ctags *"), or on all files in an entire source +directory tree (e.g. "ctags −R"), since only +those files whose names are mapped to languages will be +scanned.

+ +

[The reason +that .h extensions are mapped to C++ files rather than C +files is because it is common to use .h extensions in C++, +and no harm results in treating them as C++ files.]

+ + +

OPTIONS

+ + +

Despite the +wealth of available options, defaults are set so that +ctags is most commonly executed without any options +(e.g. "ctags *", or "ctags −R"), +which will create a tag file in the current directory for +all recognized source files. The options described below are +provided merely to allow custom tailoring to meet special +needs.

+ +

Note that +spaces separating the single-letter options from their +parameters are optional.

+ +

Note also that +the boolean parameters to the long form options (those +beginning with "−−" and that take a +"[=yes|no]" parameter) may be +omitted, in which case "=yes" is +implied. (e.g. −−sort is equivalent to +−−sort=yes). Note further that +"=1" and "=on" are +considered synonyms for "=yes", and that +"=0" and "=off" are +considered synonyms for "=no".

+ +

Some options +are either ignored or useful only when used while running in +etags mode (see −e option). Such options will +be noted.

+ +

Most options +may appear anywhere on the command line, affecting only +those files which follow the option. A few options, however, +must appear before the first file name and will be noted as +such.

+ +

Options taking +language names will accept those names in either upper or +lower case. See the +−−list−languages option for a +complete list of the built-in language names.

+ + + + + + + + + + + + + + + + + +
+ + + +

−a

+ + +

Equivalent to +−−append.

+ + +

−B

+ + +

Use backward searching patterns (e.g. ?pattern?). +[Ignored in etags mode]

+ + +

−e

+ + +

Enable etags mode, which will create a tag file for use +with the Emacs editor. Alternatively, if ctags is +invoked by a name containing the string "etags" +(either by renaming, or creating a link to, the executable), +etags mode will be enabled. This option must appear before +the first file name.

+ +

−f +tagfile

+ +

Use the name specified by +tagfile for the tag file (default is +"tags", or "TAGS" when running in etags +mode). If tagfile is specified as +"−", then the tag file is written to +standard output instead. Ctags will stubbornly refuse +to take orders if tagfile exists and its first line +contains something other than a valid tags line. This will +save your neck if you mistakenly type "ctags −f +*.c", which would otherwise overwrite your first C file +with the tags generated by the rest! It will also refuse to +accept a multi-character file name which begins with a +’−’ (dash) character, since this most +likely means that you left out the tag file name and this +option tried to grab the next option as the file name. If +you really want to name your output tag file +"−ugly", specify it as +"./−ugly". This option must appear before +the first file name. If this option is specified more than +once, only the last will apply.

+ + + + + + + +
+ + + +

−F

+ + +

Use forward +searching patterns (e.g. /pattern/) (default). [Ignored in +etags mode]

+ +

−h list

+ +

Specifies a list of file +extensions, separated by periods, which are to be +interpreted as include (or header) files. To indicate files +having no extension, use a period not followed by a +non-period character (e.g. ".", "..x", +".x."). This option only affects how the scoping +of a particular kinds of tags is interpreted (i.e. whether +or not they are considered as globally visible or visible +only within the file in which they are defined); it does not +map the extension to any particular language. Any tag which +is located in a non-include file and cannot be seen (e.g. +linked to) from another file is considered to have +file-limited (e.g. static) scope. No kind of tag appearing +in an include file will be considered to have file-limited +scope. If the first character in the list is a plus sign, +then the extensions in the list will be appended to the +current list; otherwise, the list will replace the current +list. See, also, the −−file−scope +option. The default list is +".h.H.hh.hpp.hxx.h++.inc.def". To restore the +default list, specify −h default. Note +that if an extension supplied to this option is not already +mapped to a particular language (see SOURCE FILES, +above), you will also need to use either the +−−langmap or +−−language−force option.

+ +

−I +identifier−list

+ +

Specifies a list of identifiers +which are to be specially handled while parsing C and C++ +source files. This option is specifically provided to handle +special cases arising through the use of preprocessor +macros. When the identifiers listed are simple identifiers, +these identifiers will be ignored during parsing of the +source files. If an identifier is suffixed with a +’+’ character, ctags will also ignore any +parenthesis-enclosed argument list which may immediately +follow the identifier in the source files. If two +identifiers are separated with the ’=’ +character, the first identifiers is replaced by the second +identifiers for parsing purposes. The list of identifiers +may be supplied directly on the command line or read in from +a separate file. If the first character of +identifier−list is ’@’, +’.’ or a pathname separator (’/’ or +’\’), or the first two characters specify a +drive letter (e.g. "C:"), the parameter +identifier−list will be interpreted as a +filename from which to read a list of identifiers, one per +input line. Otherwise, identifier−list is a +list of identifiers (or identifier pairs) to be specially +handled, each delimited by a either a comma or by white +space (in which case the list should be quoted to keep the +entire list as one command line argument). Multiple +−I options may be supplied. To clear the list +of ignore identifiers, supply a single dash +("−") for identifier−list.

+ +

This feature is +useful when preprocessor macros are used in such a way that +they cause syntactic confusion due to their presence. +Indeed, this is the best way of working around a number of +problems caused by the presence of syntax-busting macros in +source files (see CAVEATS, below). Some examples will +illustrate this point.

+ +

int foo +ARGDECL4(void *, ptr, long int, nbytes)

+ +

In the above +example, the macro "ARGDECL4" would be mistakenly +interpreted to be the name of the function instead of the +correct name of "foo". Specifying −I +ARGDECL4 results in the correct behavior.

+ +

/* creates an +RCS version string in module */
+MODULE_VERSION("$Revision: 690 $")

+ +

In the above +example the macro invocation looks too much like a function +definition because it is not followed by a semicolon +(indeed, it could even be followed by a global variable +definition that would look much like a K&R style +function parameter declaration). In fact, this seeming +function definition could possibly even cause the rest of +the file to be skipped over while trying to complete the +definition. Specifying −I +MODULE_VERSION+ would avoid such a problem.

+ +

CLASS Example { +
+// your content here
+};

+ +

The example +above uses "CLASS" as a preprocessor macro which +expands to something different for each platform. For +instance CLASS may be defined as "class +__declspec(dllexport)" on Win32 platforms and simply +"class" on UNIX. Normally, the absence of the C++ +keyword "class" would cause the source file to be +incorrectly parsed. Correct behavior can be restored by +specifying −I CLASS=class.

+ +

−L file

+ +

Read from file a list of +file names for which tags should be generated. If +file is specified as "−", then file +names are read from standard input. File names read using +this option are processed following file names appearing on +the command line. Options are also accepted in this input. +If this option is specified more than once, only the last +will apply. Note: file is read in +line-oriented mode, where a new line is the only delimiter +and non-trailing white space is considered significant, in +order that file names containing spaces may be supplied +(however, trailing white space is stripped from lines); this +can affect how options are parsed if included in the +input.

+ + + + + + + + + + + + + + +
+ + + +

−n

+ + +

Equivalent to +−−excmd=number.

+
+ + +

−N

+ + +

Equivalent to +−−excmd=pattern.

+
+ +

−o +tagfile

+ +

Equivalent to −f +tagfile.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +

−R

+ + +

Equivalent to +−−recurse.

+ + +

−u

+ + +

Equivalent to −−sort=no (i.e. +"unsorted").

+ + +

−V

+ + +

Equivalent to −−verbose.

+ + +

−w

+ + +

This option is silently ignored for +backward-compatibility with the ctags of SVR4 Unix.

+ + +

−x

+ + +

Print a tabular, human-readable cross reference (xref) +file to standard output instead of generating a tag file. +The information contained in the output includes: the tag +name; the kind of tag; the line number, file name, and +source line (with extra white space condensed) of the file +which defines the tag. No tag file is written and all +options affecting tag file output will be ignored. Example +applications for this feature are generating a listing of +all functions located in a source file (e.g. ctags +−x −−c−kinds=f file), or +generating a list of all externally visible global variables +located in a source file (e.g. ctags −x +−−c−kinds=v +−−file−scope=no file). This +option must appear before the first file name.

+ + +

−−append[=yes|no]

+ +

Indicates whether tags +generated from the specified files should be appended to +those already present in the tag file or should replace +them. This option is off by default. This option must appear +before the first file name.

+ + +

−−etags−include=file

+ +

Include a reference to +file in the tag file. This option may be specified as +many times as desired. This supports Emacs’ capability +to use a tag file which "includes" other tag +files. [Available only in etags mode]

+ + +

−−exclude=[pattern]

+ +

Add pattern to a list of +excluded files and directories. This option may be specified +as many times as desired. For each file name considered by +ctags, each pattern specified using this +option will be compared against both the complete path (e.g. +some/path/base.ext) and the base name (e.g. base.ext) of the +file, thus allowing patterns which match a given file name +irrespective of its path, or match only a specific path. If +appropriate support is available from the runtime library of +your C compiler, then pattern may contain the usual +shell wildcards (not regular expressions) common on Unix (be +sure to quote the option parameter to protect the wildcards +from being expanded by the shell before being passed to +ctags; also be aware that wildcards can match the +slash character, ’/’). You can determine if +shell wildcards are available on your platform by examining +the output of the −−version option, which +will include "+wildcards" in the compiled feature +list; otherwise, pattern is matched against file +names using a simple textual comparison.

+ +

If +pattern begins with the character ’@’, +then the rest of the string is interpreted as a file name +from which to read exclusion patterns, one per line. If +pattern is empty, the list of excluded patterns is +cleared. Note that at program startup, the default exclude +list contains "EIFGEN", "SCCS", +"RCS", and "CVS", which are names of +directories for which it is generally not desirable to +descend while processing the −−recurse +option.

+ + +

−−excmd=type

+ +

Determines the type of EX +command used to locate tags in the source file. [Ignored in +etags mode]

+ +

The valid +values for type (either the entire word or the first +letter is accepted) are:

+ + + + + + + +
+ + +

number

+ + +

Use only line +numbers in the tag file for locating tags. This has four +advantages:

+ + + + + + + + + + + + + + + + + + + + + + +
+ + +

1.

+ + +

Significantly reduces the size of the +resulting tag file.

+ + +

2.

+ + +

Eliminates failures to find tags because +the line defining the tag has changed, causing the pattern +match to fail (note that some editors, such as vim, +are able to recover in many such instances).

+ + +

3.

+ + +

Eliminates finding identical matching, but +incorrect, source lines (see BUGS, below).

+ + +

4.

+ + +

Retains separate entries in the tag file +for lines which are identical in content. In pattern +mode, duplicate entries are dropped because the search +patterns they generate are identical, making the duplicate +entries useless.

+ +

However, this +option has one significant drawback: changes to the source +files can cause the line numbers recorded in the tag file to +no longer correspond to the lines in the source file, +causing jumps to some tags to miss the target definition by +one or more lines. Basically, this option is best used when +the source code to which it is applied is not subject to +change. Selecting this option type causes the following +options to be ignored: −BF.

+ + + + + + + + + + + + +
+ + +

pattern

+ + +

Use only search +patterns for all tags, rather than the line numbers usually +used for macro definitions. This has the advantage of not +referencing obsolete line numbers when lines have been added +or removed since the tag file was generated.

+ + +

mixed

+ + +

In this mode, patterns are generally used with a few +exceptions. For C, line numbers are used for macro +definition tags. This was the default format generated by +the original ctags and is, therefore, retained as the +default for this option. For Fortran, line numbers are used +for common blocks because their corresponding source lines +are generally identical, making pattern searches useless for +finding all matches.

+ + +

−−extra=[+|−]flags

+ +

Specifies whether to include +extra tag entries for certain kinds of information. The +parameter flags is a set of one-letter flags, each +representing one kind of extra tag entry to include in the +tag file. If flags is preceded by by either the +’+’ or ’−’ character, the +effect of each flag is added to, or removed from, those +currently enabled; otherwise the flags replace any current +settings. The meaning of each flag is as follows:

+ + + + + + + + + + + + +
+ + +

f

+ + +

Include an entry +for the base file name of every source file (e.g. +"example.c"), which addresses the first line of +the file.

+ + +

q

+ + +

Include an extra +class-qualified tag entry for each tag which is a member of +a class (for languages for which this information is +extracted; currently C++, Eiffel, and Java). The actual form +of the qualified tag depends upon the language from which +the tag was derived (using a form that is most natural for +how qualified calls are specified in the language). For C++, +it is in the form "class::member"; for Eiffel and +Java, it is in the form "class.member". This may +allow easier location of a specific tags when multiple +occurrences of a tag name occur in the tag file. Note, +however, that this could potentially more than double the +size of the tag file.

+ + +

−−fields=[+|−]flags

+ +

Specifies the available +extension fields which are to be included in the entries of +the tag file (see TAG FILE FORMAT, below, for more +information). The parameter flags is a set of +one-letter flags, each representing one type of extension +field to include, with the following meanings (disabled by +default unless indicated):

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

a

+ + +

Access (or export) +of class members

+ + +

f

+ + +

File-restricted scoping [enabled]

+ + +

i

+ + +

Inheritance information

+ + +

k

+ + +

Kind of tag as a single letter +[enabled]

+ + +

K

+ + +

Kind of tag as full name

+ + +

l

+ + +

Language of source file containing tag

+ + +

m

+ + +

Implementation information

+ + +

n

+ + +

Line number of tag definition

+ + +

s

+ + +

Scope of tag definition [enabled]

+ + +

S

+ + +

Signature of routine (e.g. prototype or +parameter list)

+ + +

z

+ + +

Include the "kind:" key in kind +field

+ + +

t

+ + +

Type and name of a variable or typedef as +"typeref:" field [enabled]

+ +

Each letter or +group of letters may be preceded by either ’+’ +to add it to the default set, or ’−’ to +exclude it. In the absence of any preceding ’+’ +or ’−’ sign, only those kinds explicitly +listed in flags will be included in the output (i.e. +overriding the default set). This option is ignored if the +option −−format=1 has been +specified. The default value of this option is +fkst.

+ + +

−−file−scope[=yes|no]

+ +

Indicates whether tags scoped +only for a single file (i.e. tags which cannot be seen +outside of the file in which they are defined, such as +"static" tags) should be included in the output. +See, also, the −h option. This option is +enabled by default.

+ + +

−−filter[=yes|no]

+ +

Causes ctags to behave +as a filter, reading source file names from standard input +and printing their tags to standard output on a file-by-file +basis. If −−sorted is enabled, tags are +sorted only within the source file in which they are +defined. File names are read from standard input in +line-oriented input mode (see note for −L +option) and only after file names listed on the command line +or from any file supplied using the −L option. +When this option is enabled, the options −f, +−o, and −−totals are +ignored. This option is quite esoteric and is disabled by +default. This option must appear before the first file +name.

+ + +

−−filter−terminator=string

+ +

Specifies a string to print to +standard output following the tags for each file name parsed +when the −−filter option is enabled. This +may permit an application reading the output of ctags to +determine when the output for each file is finished. Note +that if the file name read is a directory and +−−recurse is enabled, this string will be +printed only one once at the end of all tags found for by +descending the directory. This string will always be +separated from the last tag line for the file by its +terminating newline. This option is quite esoteric and is +empty by default. This option must appear before the first +file name.

+ + +

−−format=level

+ +

Change the format of the output +tag file. Currently the only valid values for level +are 1 or 2. Level 1 specifies the original tag +file format and level 2 specifies a new extended format +containing extension fields (but in a manner which retains +backward-compatibility with original vi(1) +implementations). The default level is 2. This option must +appear before the first file name. [Ignored in etags +mode]

+ +

−−help

+ +

Prints to standard output a +detailed usage description, and then exits.

+ + +

−−if0[=yes|no]

+ +

Indicates a preference as to +whether code within an "#if 0" branch of a +preprocessor conditional should be examined for non-macro +tags (macro tags are always included). Because the intent of +this construct is to disable code, the default value of this +option is no. Note that this indicates a preference +only and does not guarantee skipping code within an +"#if 0" branch, since the fall-back algorithm used +to generate tags when preprocessor conditionals are too +complex follows all branches of a conditional. This option +is disabled by default.

+ + +

−−<LANG>−kinds=[+|−]kinds

+ +

Specifies a list of +language-specific kinds of tags (or kinds) to include in the +output file for a particular language, where +<LANG> is case-insensitive and is one of the +built-in language names (see the +−−list−languages option for a +complete list). The parameter kinds is a group of +one-letter flags designating kinds of tags (particular to +the language) to either include or exclude from the output. +The specific sets of flags recognized for each language, +their meanings and defaults may be list using the +−−list−kinds option. Each letter or +group of letters may be preceded by either ’+’ +to add it to, or ’−’ to remove it from, +the default set. In the absence of any preceding +’+’ or ’−’ sign, only those +kinds explicitly listed in kinds will be included in +the output (i.e. overriding the default for the specified +language).

+ +

As an example +for the C language, in order to add prototypes and external +variable declarations to the default set of tag kinds, but +exclude macros, use +−−c−kinds=+px−d; to +include only tags for functions, use +−−c−kinds=f.

+ + +

−−langdef=name

+ +

Defines a new user-defined +language, name, to be parsed with regular +expressions. Once defined, name may be used in other +options taking language names. The typical use of this +option is to first define the language, then map file names +to it using −−langmap, then specify +regular expressions using +−−regex−<LANG> to define how +its tags are found.

+ + +

−−langmap=map[,map[...]]

+ +

Controls how file names are +mapped to languages (see the +−−list−maps option). Each +comma-separated map consists of the language name +(either a built-in or user-defined language), a colon, and a +list of file extensions and/or file name patterns. A file +extension is specified by preceding the extension with a +period (e.g. ".c"). A file name pattern is +specified by enclosing the pattern in parentheses (e.g. +"([Mm]akefile)"). If appropriate support is +available from the runtime library of your C compiler, then +the file name pattern may contain the usual shell wildcards +common on Unix (be sure to quote the option parameter to +protect the wildcards from being expanded by the shell +before being passed to ctags). You can determine if +shell wildcards are available on your platform by examining +the output of the −−version option, which +will include "+wildcards" in the compiled feature +list; otherwise, the file name patterns are matched against +file names using a simple textual comparison. When mapping a +file extension, it will first be unmapped from any other +languages.

+ +

If the first +character in a map is a plus sign, then the extensions and +file name patterns in that map will be appended to the +current map for that language; otherwise, the map will +replace the current map. For example, to specify that only +files with extensions of .c and .x are to be treated as C +language files, use +"−−langmap=c:.c.x"; to +also add files with extensions of .j as Java language files, +specify +"−−langmap=c:.c.x,java:+.j". +To map makefiles (e.g. files named either +"Makefile", "makefile", or having the +extension ".mak") to a language called +"make", specify +"−−langmap=make:([Mm]akefile).mak". +To map files having no extension, specify a period not +followed by a non-period character (e.g. ".", +"..x", ".x."). To clear the mapping for +a particular language (thus inhibiting automatic generation +of tags for that language), specify an empty extension list +(e.g. +"−−langmap=fortran:"). +To restore the default language mappings for all a +particular language, supply the keyword "default" +for the mapping. To specify restore the default language +mappings for all languages, specify +"−−langmap=default". +Note that file extensions are tested before file name +patterns when inferring the language of a file.

+ + +

−−language−force=language

+ +

By default, ctags +automatically selects the language of a source file, +ignoring those files whose language cannot be determined +(see SOURCE FILES, above). This option forces the +specified language (case-insensitive; either built-in +or user-defined) to be used for every supplied file instead +of automatically selecting the language based upon its +extension. In addition, the special value auto +indicates that the language should be automatically selected +(which effectively disables this option).

+ + +

−−languages=[+|−]list

+ +

Specifies the languages for +which tag generation is enabled, with list containing +a comma-separated list of language names (case-insensitive; +either built-in or user-defined). If the first language of +list is not preceded by either a ’+’ or +’−’, the current list will be cleared +before adding or removing the languages in list. +Until a ’−’ is encountered, each language +in the list will be added to the current list. As either the +’+’ or ’−’ is encountered in +the list, the languages following it are added or removed +from the current list, respectively. Thus, it becomes simple +to replace the current list with a new one, or to add or +remove languages from the current list. The actual list of +files for which tags will be generated depends upon the +language extension mapping in effect (see the +−−langmap option). Note that all +languages, including user-defined languages are enabled +unless explicitly disabled using this option. Language names +included in list may be any built-in language or one +previously defined with −−langdef. The +default is "all", which is also accepted as a +valid argument. See the +−−list−languages option for a +complete list of the built-in language names.

+ + +

−−license

+ +

Prints a summary of the +software license to standard output, and then exits.

+ + +

−−line−directives[=yes|no]

+ +

Specifies whether +"#line" directives should be recognized. These are +present in the output of preprocessors and contain the line +number, and possibly the file name, of the original source +file(s) from which the preprocessor output file was +generated. When enabled, this option will cause ctags +to generate tag entries marked with the file names and line +numbers of their locations original source file(s), instead +of their actual locations in the preprocessor output. The +actual file names placed into the tag file will have the +same leading path components as the preprocessor output +file, since it is assumed that the original source files are +located relative to the preprocessor output file (unless, of +course, the #line directive specifies an absolute path). +This option is off by default. Note: This option is +generally only useful when used together with the +−−excmd=number (−n) +option. Also, you may have to use either the +−−langmap or +−−language−force option if the +extension of the preprocessor output file is not known to +ctags.

+ + +

−−links[=yes|no]

+ +

Indicates whether symbolic +links (if supported) should be followed. When disabled, +symbolic links are ignored. This option is on by +default.

+ + +

−−list−kinds[=language|all]

+ +

Lists the tag kinds recognized +for either the specified language or all languages, and then +exits. Each kind of tag recorded in the tag file is +represented by a one-letter flag, which is also used to +filter the tags placed into the output through use of the +−−<LANG>−kinds option. Note +that some languages and/or tag kinds may be implemented +using regular expressions and may not be available if regex +support is not compiled into ctags (see the +−−regex−<LANG> option). Each +kind listed is enabled unless followed by +"[off]".

+ + +

−−list−maps[=language|all]

+ +

Lists the file extensions and +file name patterns which associate a file name with a +language for either the specified language or all languages, +and then exits. See the −−langmap option, +and SOURCE FILES, above.

+ + +

−−list−languages

+ +

Lists the names of the +languages understood by ctags, and then exits. These +language names are case insensitive and may be used in the +−−language−force, +−−languages, +−−<LANG>−kinds, and +−−regex−<LANG> options.

+ + +

−−options=file

+ +

Read additional options from +file. The file should contain one option per line. As +a special case, if −−options=NONE +is specified as the first option on the command line, it +will disable the automatic reading of any configuration +options from either a file or the environment (see +FILES).

+ + +

−−recurse[=yes|no]

+ +

Recurse into directories +encountered in the list of supplied files. If the list of +supplied files is empty and no file list is specified with +the −L option, then the current directory (i.e. +".") is assumed. Symbolic links are followed. If +you don’t like these behaviors, either explicitly +specify the files or pipe the output of find(1) into +ctags −L− instead. Note: This +option is not supported on all platforms at present. It is +available if the output of the −−help +option includes this option. See, also, the +−−exclude to limit recursion.

+ + +

−−regex−<LANG>=/regexp/replacement/[kind−spec/][flags]

+ +

The /regexp/replacement/ +pair define a regular expression replacement pattern, +similar in style to sed substitution commands, with +which to generate tags from source files mapped to the named +language, <LANG>, (case-insensitive; either a +built-in or user-defined language). The regular expression, +regexp, defines an extended regular expression +(roughly that used by egrep(1)), which is used to +locate a single source line containing a tag and may specify +tab characters using \t. When a matching line is found, a +tag will be generated for the name defined by +replacement, which generally will contain the special +back-references \1 through \9 to refer to matching +sub-expression groups within regexp. The +’/’ separator characters shown in the parameter +to the option can actually be replaced by any character. +Note that whichever separator character is used will have to +be escaped with a backslash (’\’) character +wherever it is used in the parameter as something other than +a separator. The regular expression defined by this option +is added to the current list of regular expressions for the +specified language unless the parameter is omitted, in which +case the current list is cleared.

+ +

Unless modified +by flags, regexp is interpreted as a Posix +extended regular expression. The replacement should +expand for all matching lines to a non-empty string of +characters, or a warning message will be reported. An +optional kind specifier for tags matching regexp may +follow replacement, which will determine what kind of +tag is reported in the "kind" extension field (see +TAG FILE FORMAT, below). The full form of +kind−spec is in the form of a single letter, a +comma, a name (without spaces), a comma, a description, +followed by a separator, which specify the short and long +forms of the kind value and its textual description +(displayed using −−list−kinds). +Either the kind name and/or the description may be omitted. +If kind−spec is omitted, it defaults to +"r,regex". Finally, flags are one or +more single-letter characters having the following effect +upon the interpretation of regexp:

+ + + + + + + + + + + + + + + + + +
+ + +

b

+ + +

The pattern is +interpreted as a Posix basic regular expression.

+ + +

e

+ + +

The pattern is +interpreted as a Posix extended regular expression +(default).

+ + +

i

+ + +

The regular +expression is to be applied in a case-insensitive +manner.

+ +

Note that this +option is available only if ctags was compiled with +support for regular expressions, which depends upon your +platform. You can determine if support for regular +expressions is compiled in by examining the output of the +−−version option, which will include +"+regex" in the compiled feature list.

+ +

For more +information on the regular expressions used by ctags, +see either the regex(5,7) man page, or the GNU info +documentation for regex (e.g. "info regex").

+ + +

−−sort[=yes|no|foldcase]

+ +

Indicates whether the tag file +should be sorted on the tag name (default is yes). +Note that the original vi(1) required sorted tags. +The foldcase value specifies case insensitive (or +case-folded) sorting. Fast binary searches of tag files +sorted with case-folding will require special support from +tools using tag files, such as that found in the ctags +readtags library, or Vim version 6.2 or higher (using +"set ignorecase"). This option must appear before +the first file name. [Ignored in etags mode]

+ + +

−−tag−relative[=yes|no]

+ +

Indicates that the file paths +recorded in the tag file should be relative to the directory +containing the tag file, rather than relative to the current +directory, unless the files supplied on the command line are +specified with absolute paths. This option must appear +before the first file name. The default is yes when +running in etags mode (see the −e option), +no otherwise.

+ + +

−−totals[=yes|no]

+ +

Prints statistics about the +source files read and the tag file written during the +current invocation of ctags. This option is off by +default. This option must appear before the first file +name.

+ + +

−−verbose[=yes|no]

+ +

Enable verbose mode. This +prints out information on option processing and a brief +message describing what action is being taken for each file +considered by ctags. Normally, ctags does not +read command line arguments until after options are read +from the configuration files (see FILES, below) and +the CTAGS environment variable. However, if this +option is the first argument on the command line, it will +take effect before any options are read from these sources. +The default is no.

+ + +

−−version

+ +

Prints a version identifier for +ctags to standard output, and then exits. This is +guaranteed to always contain the string "Exuberant +Ctags".

+ + +

OPERATIONAL DETAILS

+ + +

As ctags +considers each file name in turn, it tries to determine the +language of the file by applying the following three tests +in order: if the file extension has been mapped to a +language, if the file name matches a shell pattern mapped to +a language, and finally if the file is executable and its +first line specifies an interpreter using the Unix-style +"#!" specification (if supported on the platform). +If a language was identified, the file is opened and then +the appropriate language parser is called to operate on the +currently open file. The parser parses through the file and +adds an entry to the tag file for each language object it is +written to handle. See TAG FILE FORMAT, below, for +details on these entries.

+ +

This +implementation of ctags imposes no formatting +requirements on C code as do legacy implementations. Older +implementations of ctags tended to rely upon certain +formatting assumptions in order to help it resolve coding +dilemmas caused by preprocessor conditionals.

+ +

In general, +ctags tries to be smart about conditional +preprocessor directives. If a preprocessor conditional is +encountered within a statement which defines a tag, +ctags follows only the first branch of that +conditional (except in the special case of "#if +0", in which case it follows only the last branch). The +reason for this is that failing to pursue only one branch +can result in ambiguous syntax, as in the following +example:

+ +

#ifdef +TWO_ALTERNATIVES
+struct {
+#else
+union {
+#endif

+ +

short a;
+long b;

+ +

}

+ +

Both branches +cannot be followed, or braces become unbalanced and +ctags would be unable to make sense of the +syntax.

+ +

If the +application of this heuristic fails to properly parse a +file, generally due to complicated and inconsistent pairing +within the conditionals, ctags will retry the file +using a different heuristic which does not selectively +follow conditional preprocessor branches, but instead falls +back to relying upon a closing brace ("}") in +column 1 as indicating the end of a block once any brace +imbalance results from following a #if conditional +branch.

+ +

Ctags +will also try to specially handle arguments lists enclosed +in double sets of parentheses in order to accept the +following conditional construct:

+ +

extern void foo +__ARGS((int one, char two));

+ +

Any name +immediately preceding the "((" will be +automatically ignored and the previous name will be +used.

+ +

C++ operator +definitions are specially handled. In order for consistency +with all types of operators (overloaded and conversion), the +operator name in the tag file will always be preceded by the +string "operator " (i.e. even if the actual +operator definition was written as +"operator<<").

+ +

After creating +or appending to the tag file, it is sorted by the tag name, +removing identical tag lines.

+ + +

TAG FILE FORMAT

+ + +

When not +running in etags mode, each entry in the tag file consists +of a separate line, each looking like this in the most +general case:

+ + +

tag_name<TAB>file_name<TAB>ex_cmd;"<TAB>extension_fields

+ +

The fields and +separators of these lines are specified as follows:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

1.

+ + +

tag name

+ + +

2.

+ + +

single tab character

+ + +

3.

+ + +

name of the file in which the object +associated with the tag is located

+ + +

4.

+ + +

single tab character

+ + +

5.

+ + +

EX command used to locate the tag within +the file; generally a search pattern (either /pattern/ or +?pattern?) or line number (see −−excmd). +Tag file format 2 (see −−format) extends +this EX command under certain circumstances to include a set +of extension fields (described below) embedded in an EX +comment immediately appended to the EX command, which leaves +it backward-compatible with original vi(1) +implementations.

+ +

A few special +tags are written into the tag file for internal purposes. +These tags are composed in such a way that they always sort +to the top of the file. Therefore, the first two characters +of these tags are used a magic number to detect a tag file +for purposes of determining whether a valid tag file is +being overwritten rather than a source file.

+ +

Note that the +name of each source file will be recorded in the tag file +exactly as it appears on the command line. Therefore, if the +path you specified on the command line was relative to the +current directory, then it will be recorded in that same +manner in the tag file. See, however, the +−−tag−relative option for how this +behavior can be modified.

+ +

Extension +fields are tab-separated key-value pairs appended to the end +of the EX command as a comment, as described above. These +key value pairs appear in the general form +"key:value". Their presence in the +lines of the tag file are controlled by the +−−fields option. The possible keys and +the meaning of their values are as follows:

+ + + + + + + + + + + + + + + + + +
+ + +

access

+ + +

Indicates the +visibility of this class member, where value is +specific to the language.

+ + +

file

+ + +

Indicates that the tag has file-limited visibility. This +key has no corresponding value.

+ + +

kind

+ + +

Indicates the type, or kind, of tag. Its value is either +one of the corresponding one-letter flags described under +the various −−<LANG>−kinds +options above, or a full name. It is permitted (and is, in +fact, the default) for the key portion of this field to be +omitted. The optional behaviors are controlled with the +−−fields option.

+ +

implementation

+ +

When present, this indicates a +limited implementation (abstract vs. concrete) of a routine +or class, where value is specific to the language +("virtual" or "pure virtual" for C++; +"abstract" for Java).

+ + + + + + + + + + + + +
+ + + +

inherits

+ + +

When present, +value. is a comma-separated list of classes from +which this class is derived (i.e. inherits from).

+ + +

signature

+ + +

When present, value is a language-dependent +representation of the signature of a routine. A routine +signature in its complete form specifies the return type of +a routine and its formal argument list. This extension field +is presently supported only for C-based languages and does +not include the return type.

+ +

In addition, +information on the scope of the tag definition may be +available, with the key portion equal to some +language-dependent construct name and its value the name +declared for that construct in the program. This scope entry +indicates the scope in which the tag was found. For example, +a tag generated for a C structure member would have a scope +looking like "struct:myStruct".

+ + +

HOW TO USE WITH VI

+ + +

Vi will, by +default, expect a tag file by the name "tags" in +the current directory. Once the tag file is built, the +following commands exercise the tag indexing feature:

+ + + + + + + + + + + + + + + + + + + + + + +
+ + +

vi −t +tag

+ + +

Start vi and +position the cursor at the file and line where +"tag" is defined.

+ + +

:ta tag

+ + +

Find a tag.

+ + +

Ctrl-]

+ + +

Find the tag under +the cursor.

+ + +

Ctrl-T

+ + +

Return to previous +location before jump to tag (not widely implemented).

+ + +

HOW TO USE WITH GNU EMACS

+ + +

Emacs will, by +default, expect a tag file by the name "TAGS" in +the current directory. Once the tag file is built, the +following commands exercise the tag indexing feature: +
+M-x visit−tags−table <RET> FILE +<RET>

+ +

Select the tag file, +"FILE", to use.

+ +

M-. [TAG] +<RET>

+ +

Find the first definition of +TAG. The default tag is the identifier under the cursor.

+ + + + + + + + + + + + + + +
+ + +

M-*

+ + +

Pop back to where +you previously invoked "M-.".

+
+ + +

C-u M-.

+ + +

Find the next +definition for the last tag.

+
+ +

For more +commands, see the Tags topic in the Emacs info +document.

+ + +

HOW TO USE WITH NEDIT

+ + +

NEdit version +5.1 and later can handle the new extended tag file format +(see −−format). To make NEdit use the tag +file, select "File−>Load Tags File". To +jump to the definition for a tag, highlight the word, the +press Ctrl-D. NEdit 5.1 can can read multiple tag files from +different directories. Setting the X resource nedit.tagFile +to the name of a tag file instructs NEdit to automatically +load that tag file at startup time.

+ + +

CAVEATS

+ + +

Because +ctags is neither a preprocessor nor a compiler, use +of preprocessor macros can fool ctags into either +missing tags or improperly generating inappropriate tags. +Although ctags has been designed to handle certain +common cases, this is the single biggest cause of reported +problems. In particular, the use of preprocessor constructs +which alter the textual syntax of C can fool ctags. +You can work around many such problems by using the +−I option.

+ +

Note that since +ctags generates patterns for locating tags (see the +−−excmd option), it is entirely possible +that the wrong line may be found by your editor if there +exists another source line which is identical to the line +containing the tag. The following example demonstrates this +condition:

+ +

int +variable;

+ +

/* ... */
+void foo(variable)
+int variable;
+{

+ +

/* ... */

+ +

}

+ +

Depending upon +which editor you use and where in the code you happen to be, +it is possible that the search pattern may locate the local +parameter declaration in foo() before it finds the actual +global variable definition, since the lines (and therefore +their search patterns are identical). This can be avoided by +use of the −−excmd=n option.

+ + +

BUGS

+ + +

Ctags +has more options than ls(1).

+ +

When parsing a +C++ member function definition (e.g. +"className::function"), ctags cannot +determine whether the scope specifier is a class name or a +namespace specifier and always lists it as a class name in +the scope portion of the extension fields. Also, if a C++ +function is defined outside of the class declaration (the +usual case), the access specification (i.e. public, +protected, or private) and implementation information (e.g. +virtual, pure virtual) contained in the function declaration +are not known when the tag is generated for the function +definition. It will, however be available for prototypes +(e.g −−c++−kinds=+p).

+ +

No qualified +tags are generated for language objects inherited into a +class.

+ + +

ENVIRONMENT VARIABLES

+ + + + + + + + + + + + + + + + + + +
+ + +

CTAGS

+ + +

If this environment +variable exists, it will be expected to contain a set of +default options which are read when ctags starts, +after the configuration files listed in FILES, below, +are read, but before any command line options are read. +Options appearing on the command line will override options +specified in this variable. Only options will be read from +this variable. Note that all white space in this variable is +considered a separator, making it impossible to pass an +option parameter containing an embedded space. If this is a +problem, use a configuration file instead.

+ + +

ETAGS

+ + +

Similar to the CTAGS variable above, this +variable, if found, will be read when etags starts. +If this variable is not found, etags will try to use +CTAGS instead.

+ + +

TMPDIR

+ + +

On Unix-like hosts where mkstemp() is available, the +value of this variable specifies the directory in which to +place temporary files. This can be useful if the size of a +temporary file becomes too large to fit on the partition +holding the default temporary directory defined at +compilation time. ctags creates temporary files only +if either (1) an emacs-style tag file is being generated, +(2) the tag file is being sent to standard output, or (3) +the program was compiled to use an internal sort algorithm +to sort the tag files instead of the the sort utility of the +operating system. If the sort utility of the operating +system is being used, it will generally observe this +variable also. Note that if ctags is setuid, the +value of TMPDIR will be ignored.

+ + +

FILES

+ + +

/ctags.cnf +(on MSDOS, MSWindows only)
+/etc/ctags.conf
+/usr/local/etc/ctags.conf
+$HOME/.ctags
+$HOME/ctags.cnf (on MSDOS, MSWindows only)
+.ctags
+ctags.cnf (on MSDOS, MSWindows only)

+ +

If any of these configuration +files exist, each will be expected to contain a set of +default options which are read in the order listed when +ctags starts, but before the CTAGS environment +variable is read or any command line options are read. This +makes it possible to set up site-wide, personal or +project-level defaults. It is possible to compile +ctags to read an additional configuration file before +any of those shown above, which will be indicated if the +output produced by the −−version option +lists the "custom-conf" feature. Options appearing +in the CTAGS environment variable or on the command +line will override options specified in these files. Only +options will be read from these files. Note that the option +files are read in line-oriented mode in which spaces are +significant (since shell quoting is not possible). Each line +of the file is read as one command line parameter (as if it +were quoted with single quotes). Therefore, use new lines to +indicate separate command-line arguments.

+ + + + + + + + + + + + + + +
+ + +

tags

+ + +

The default tag +file created by ctags.

+
+ + +

TAGS

+ + +

The default tag +file created by etags.

+
+ + +

SEE ALSO

+ + +

The official +Exuberant Ctags web site at:

+ + +

http://ctags.sourceforge.net

+ +

Also +ex(1), vi(1), elvis, or, better yet, +vim, the official editor of ctags. For more +information on vim, see the VIM Pages web site +at:

+ + +

http://www.vim.org/

+ + +

AUTHOR

+ + +

Darren Hiebert +<dhiebert at users.sourceforge.net>
+http://DarrenHiebert.com/

+ + +

MOTIVATION

+ + +

"Think ye +at all times of rendering some service to every member of +the human race."

+ +

"All +effort and exertion put forth by man from the fullness of +his heart is worship, if it is prompted by the highest +motives and the will to do service to humanity."

+ +

−− +From the Baha’i Writings

+ + +

CREDITS

+ + +

This version of +ctags was originally derived from and inspired by the +ctags program by Steve Kirkendall +<kirkenda@cs.pdx.edu> that comes with the Elvis vi +clone (though virtually none of the original code +remains).

+ +

Credit is also +due Bram Moolenaar <Bram@vim.org>, the author of +vim, who has devoted so much of his time and energy +both to developing the editor as a service to others, and to +helping the orphans of Uganda.

+ +

The section +entitled "HOW TO USE WITH GNU EMACS" was +shamelessly stolen from the info page for GNU +etags.

+
+ + diff --git a/ctags.spec b/ctags.spec new file mode 100644 index 0000000..88939fe --- /dev/null +++ b/ctags.spec @@ -0,0 +1,40 @@ +Summary: Exuberant Ctags - a multi-language source code indexing tool +Name: ctags +Version: @VERSION@ +Release: 1 +License: GPL +Group: Development/Tools +Source: http://prdownloads.sourceforge.net/ctags/ctags-%{version}.tar.gz +URL: http://ctags.sourceforge.net +Buildroot: %{_tmppath}/%{name}-%{version}-root + +%description +Exuberant Ctags generates an index (or tag) file of language objects +found in source files for many popular programming languages. This index +makes it easy for text editors and other tools to locate the indexed +items. Exuberant Ctags improves on traditional ctags because of its +multilanguage support, its ability for the user to define new languages +searched by regular expressions, and its ability to generate emacs-style +TAGS files. + +Install ctags if you are going to use your system for programming. + +%prep +%setup -q + +%build +%configure +make + +%install +rm -rf $RPM_BUILD_ROOT +%makeinstall + +%clean +rm -rf $RPM_BUILD_ROOT + +%files +%defattr(-,root,root) +%doc COPYING EXTENDING.html FAQ NEWS README ctags.html +%{_bindir}/ctags +%{_mandir}/man1/ctags* diff --git a/debug.c b/debug.c new file mode 100644 index 0000000..6d44ad5 --- /dev/null +++ b/debug.c @@ -0,0 +1,113 @@ +/* +* $Id: debug.c 558 2007-06-15 19:17:02Z elliotth $ +* +* Copyright (c) 1996-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains debugging functions. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include +#include + +#include "debug.h" +#include "options.h" +#include "read.h" + +/* +* FUNCTION DEFINITIONS +*/ + +#ifdef DEBUG + +extern void lineBreak (void) {} /* provides a line-specified break point */ + +extern void debugPrintf ( + const enum eDebugLevels level, const char *const format, ... ) +{ + va_list ap; + + va_start (ap, format); + if (debug (level)) + vprintf (format, ap); + fflush (stdout); + va_end (ap); +} + +extern void debugPutc (const int level, const int c) +{ + if (debug (level) && c != EOF) + { + if (c == STRING_SYMBOL) printf ("\"string\""); + else if (c == CHAR_SYMBOL) printf ("'c'"); + else putchar (c); + + fflush (stdout); + } +} + +extern void debugParseNest (const boolean increase, const unsigned int level) +{ + debugPrintf (DEBUG_PARSE, "<*%snesting:%d*>", increase ? "++" : "--", level); +} + +extern void debugCppNest (const boolean begin, const unsigned int level) +{ + debugPrintf (DEBUG_CPP, "<*cpp:%s level %d*>", begin ? "begin":"end", level); +} + +extern void debugCppIgnore (const boolean ignore) +{ + debugPrintf (DEBUG_CPP, "<*cpp:%s ignore*>", ignore ? "begin":"end"); +} + +extern void debugEntry (const tagEntryInfo *const tag) +{ + const char *const scope = tag->isFileScope ? "{fs}" : ""; + + if (debug (DEBUG_PARSE)) + { + printf ("<#%s%s:%s", scope, tag->kindName, tag->name); + + if (tag->extensionFields.scope [0] != NULL && + tag->extensionFields.scope [1] != NULL) + printf (" [%s:%s]", tag->extensionFields.scope [0], + tag->extensionFields.scope [1]); + + if (Option.extensionFields.inheritance && + tag->extensionFields.inheritance != NULL) + printf (" [inherits:%s]", tag->extensionFields.inheritance); + + if (Option.extensionFields.fileScope && + tag->isFileScope && ! isHeaderFile ()) + printf (" [file:]"); + + if (Option.extensionFields.access && + tag->extensionFields.access != NULL) + printf (" [access:%s]", tag->extensionFields.access); + + if (Option.extensionFields.implementation && + tag->extensionFields.implementation != NULL) + printf (" [imp:%s]", tag->extensionFields.implementation); + + if (Option.extensionFields.typeRef && + tag->extensionFields.typeRef [0] != NULL && + tag->extensionFields.typeRef [1] != NULL) + printf (" [%s:%s]", tag->extensionFields.typeRef [0], + tag->extensionFields.typeRef [1]); + + printf ("#>"); + fflush (stdout); + } +} + +#endif + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/debug.h b/debug.h new file mode 100644 index 0000000..41a6881 --- /dev/null +++ b/debug.h @@ -0,0 +1,70 @@ +/* +* $Id: debug.h 558 2007-06-15 19:17:02Z elliotth $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to debug.c +*/ +#ifndef _DEBUG_H +#define _DEBUG_H + +/* +* Include files +*/ +#include "general.h" /* must always come first */ + +#ifdef DEBUG +# include +#endif +#include "entry.h" + +/* +* Macros +*/ + +#ifdef DEBUG +# define debug(level) ((Option.debugLevel & (long)(level)) != 0) +# define DebugStatement(x) x +# define PrintStatus(x) if (debug(DEBUG_STATUS)) printf x; +# define Assert(c) assert(c) +#else +# define DebugStatement(x) +# define PrintStatus(x) +# define Assert(c) +# ifndef NDEBUG +# define NDEBUG +# endif +#endif + +/* +* Data declarations +*/ + +/* Defines the debugging levels. + */ +enum eDebugLevels { + DEBUG_READ = 0x01, /* echo raw (filtered) characters */ + DEBUG_PARSE = 0x02, /* echo parsing results */ + DEBUG_STATUS = 0x04, /* echo file status information */ + DEBUG_OPTION = 0x08, /* echo option parsing */ + DEBUG_CPP = 0x10, /* echo characters out of pre-processor */ + DEBUG_RAW = 0x20 /* echo raw (filtered) characters */ +}; + +/* +* Function prototypes +*/ +extern void lineBreak (void); +extern void debugPrintf (const enum eDebugLevels level, const char *const format, ...) __printf__ (2, 3); +extern void debugPutc (const int level, const int c); +extern void debugParseNest (const boolean increase, const unsigned int level); +extern void debugCppNest (const boolean begin, const unsigned int level); +extern void debugCppIgnore (const boolean ignore); +extern void debugEntry (const tagEntryInfo *const tag); + +#endif /* _DEBUG_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/descrip.mms b/descrip.mms new file mode 100644 index 0000000..4a83e5c --- /dev/null +++ b/descrip.mms @@ -0,0 +1,68 @@ +# $Id: descrip.mms 2 2001-11-02 04:53:43Z darren $ +# +# Makefile for building CTAGS under OpenVMS +# +# Maintained by by Zoltan Arpadffy +# +# Edit the lines in the Configuration section below to select. + +###################################################################### +# Configuration section. +###################################################################### +# Compiler selection. +# Comment out if you use the VAXC compiler +###################################################################### +DECC = YES + +###################################################################### +# Uncomment if want a debug version. Resulting executable is DCTAGS.EXE +###################################################################### +# DEBUG = YES + +###################################################################### +# End of configuration section. +# +# Please, do not change anything below without programming experience. +###################################################################### + +CC = cc + +.IFDEF DECC +CC_DEF = $(CC)/decc/prefix=all +.ELSE +CC_DEF = $(CC) +.ENDIF + +LD_DEF = link + +.IFDEF DEBUG +TARGET = dctags.exe +CFLAGS = /debug/noopt/list/cross_reference/include=[] +LDFLAGS = /debug +.ELSE +TARGET = ctags.exe +CFLAGS = /include=[] +LDFLAGS = +.ENDIF + +OBJEXT = obj + +.SUFFIXES : .obj .c + +.INCLUDE source.mak + +EXTRA_OBJS = argproc.obj + +all : $(TARGET) + ! $@ + +.c.obj : + $(CC_DEF) $(CFLAGS) $< + +$(TARGET) : $(OBJECTS) $(EXTRA_OBJS) + $(LD_DEF) $(LDFLAGS) /exe=$(TARGET) $+ + +clean : + -@ if F$SEARCH("*.obj") .NES. "" then delete/noconfirm/nolog *.obj.* + -@ if F$SEARCH("*.exe") .NES. "" then delete/noconfirm/nolog *.exe.* + -@ if F$SEARCH("config.h") .NES. "" then delete/noconfirm/nolog config.h.* diff --git a/dosbatch.c b/dosbatch.c new file mode 100644 index 0000000..c165183 --- /dev/null +++ b/dosbatch.c @@ -0,0 +1,42 @@ +/* +* $Id$ +* +* Copyright (c) 2009, David Fishburn +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for DOS Batch language files. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include +#include "parse.h" + +/* +* FUNCTION DEFINITIONS +*/ + +static void installDosBatchRegex (const langType language) +{ + addTagRegex (language, + "^:([A-Za-z_0-9]+)", "\\1", "l,label,labels", NULL); + addTagRegex (language, + "set[ \t]+([A-Za-z_0-9]+)[ \t]*=", "\\1", "v,variable,variables", NULL); +} + +extern parserDefinition* DosBatchParser () +{ + static const char *const extensions [] = { "bat", "cmd", NULL }; + parserDefinition* const def = parserNew ("DosBatch"); + def->extensions = extensions; + def->initialize = installDosBatchRegex; + def->regex = TRUE; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/e_amiga.h b/e_amiga.h new file mode 100644 index 0000000..511a78f --- /dev/null +++ b/e_amiga.h @@ -0,0 +1,24 @@ +/* +* $Id: e_amiga.h 136 2002-03-08 22:35:19Z darren $ +* +* Copyright (c) 2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Configures ctags for Amiga environment. +*/ +#ifndef E_AMIGA_H +#define E_AMIGA_H + +#define HAVE_STDLIB_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_SYS_TYPES_H 1 +#define HAVE_TIME_H 1 +#define HAVE_CLOCK 1 +#define HAVE_FGETPOS 1 +#define HAVE_STRERROR 1 +#define HAVE_STRICMP 1 +#define HAVE_STRNICMP 1 + +#endif diff --git a/e_djgpp.h b/e_djgpp.h new file mode 100644 index 0000000..5cd5190 --- /dev/null +++ b/e_djgpp.h @@ -0,0 +1,47 @@ +/* +* $Id: e_djgpp.h 375 2003-10-31 04:15:35Z darren $ +* +* Copyright (c) 2002-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Configures ctags for DJGPP environment. +*/ +#ifndef E_DJGPP_H +#define E_DJGPP_H + +#define CASE_INSENSITIVE_FILENAMES 1 +#define MSDOS_STYLE_PATH 1 + +#define HAVE_DIR_H 1 +#define HAVE_DIRENT_H 1 +#define HAVE_FCNTL_H 1 +#define HAVE_FNMATCH_H 1 +#define HAVE_STDLIB_H 1 +#define HAVE_STRING_H 1 +#define HAVE_SYS_DIR_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_SYS_TIMES_H 1 +#define HAVE_SYS_TYPES_H 1 +#define HAVE_TIME_H 1 +#define HAVE_UNISTD_H 1 + +#define HAVE_CLOCK 1 +#define HAVE_FGETPOS 1 +#define HAVE_FNMATCH 1 +#define HAVE_MKSTEMP 1 +#define HAVE_OPENDIR 1 +#define HAVE_REGCOMP 1 +#define HAVE_REMOVE 1 +#define HAVE_SETENV 1 +#define HAVE_STAT_ST_INO 1 +#define HAVE_STRCASECMP 1 +#define HAVE_STRERROR 1 +#define HAVE_STRNCASECMP 1 +#define HAVE_STRSTR 1 +#define HAVE_TRUNCATE 1 +#define NEED_PROTO_LSTAT 1 +#define STDC_HEADERS 1 + +#endif diff --git a/e_mac.h b/e_mac.h new file mode 100644 index 0000000..053eab0 --- /dev/null +++ b/e_mac.h @@ -0,0 +1,143 @@ +/* +* $Id: e_mac.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2001, Maarten L. Hekkelman +* +* Author: Maarten L. Hekkelman +* http://www.hekkelman.com +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. It is provided on an as-is basis and no +* responsibility is accepted for its failure to perform as expected. +* +* Configures ctags for Macintosh environment. +*/ +#ifndef E_MAC_H +#define E_MAC_H + +#define BUILD_MPW_TOOL 1 + +#define MACROS_USE_PATTERNS 1 +#define DEFAULT_FILE_FORMAT 2 +#define INTERNAL_SORT 1 +#define TMPDIR "/tmp" +#define NEED_PROTO_TRUNCATE 1 +#define STDC_HEADERS 1 +#define HAVE_CLOCK 1 +#define HAVE_FGETPOS 1 +#define HAVE_OPENDIR 1 +#define HAVE_REMOVE 1 +#define HAVE_SETENV 1 +#define HAVE_STRERROR 1 +#define HAVE_STRSTR 1 +#define HAVE_FCNTL_H 1 +#define HAVE_STDLIB_H 1 +#define HAVE_STRING_H 1 +#define HAVE_SYS_DIR_H 1 +#define HAVE_SYS_TIMES_H 1 +#define HAVE_TIME_H 1 +#define HAVE_TYPES_H 1 +#define HAVE_STDLIB_H 1 + +#include +#include +#include +#include +#include + +#include + +#if BUILD_MPW_TOOL + +/* + The following defines are collected from various header files from some + Linux distribution +*/ + +typedef unsigned long mode_t; +typedef unsigned long ino_t; +typedef unsigned long dev_t; +typedef short nlink_t; +typedef unsigned long uid_t; +typedef unsigned long gid_t; + +/* Encoding of the file mode. */ +#define S_IFMT 0170000 /* These bits determine file type. */ + +/* File types. */ +#define S_IFDIR 0040000 /* Directory. */ +#define S_IFCHR 0020000 /* Character device. */ +#define S_IFBLK 0060000 /* Block device. */ +#define S_IFREG 0100000 /* Regular file. */ + +#define S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask)) + +#define S_ISDIR(mode) S_ISTYPE((mode), S_IFDIR) +#define S_ISCHR(mode) S_ISTYPE((mode), S_IFCHR) +#define S_ISBLK(mode) S_ISTYPE((mode), S_IFBLK) +#define S_ISREG(mode) S_ISTYPE((mode), S_IFREG) + +struct stat { + dev_t st_dev; /* Device. */ + unsigned short int __pad1; + ino_t st_ino; /* File serial number. */ + mode_t st_mode; /* File mode. */ + nlink_t st_nlink; /* Link count. */ + uid_t st_uid; /* User ID of the file's owner. */ + gid_t st_gid; /* Group ID of the file's group.*/ + off_t st_size; /* Size of file, in bytes. */ + unsigned long int st_blksize; /* Optimal block size for I/O. */ + long st_blocks; /* Number 512-byte blocks allocated. */ + time_t st_atime; /* Time of last access. */ + time_t st_mtime; /* Time of last modification. */ + time_t st_ctime; /* Time of last status change. */ +}; + +int fstat(int fildes, struct stat *buf); + +#else +#include +#include +#endif + +#ifndef PATH_MAX +#define PATH_MAX 1024 +#endif + +/* + Our own stat, accepts unix like paths. +*/ +int mstat(const char *path, struct stat *buf); + +struct dirent { + char d_name[64]; +}; + +typedef struct { + FSSpec file; + int index; + struct dirent ent; +} DIR; + +extern DIR* opendir(const char *dirname); +extern struct dirent* readdir(DIR* dirp); +extern int closedir(DIR* dirp); +extern void rewinddir(DIR* dirp); +extern char* getcwd(char*, int); + +/* + Our own fopen, accepts unix like paths. +*/ +extern FILE* mfopen(const char* file, const char* mode); + +/* + Dirty, define the standard functions fopen, stat and lstat to map to our + own routines. +*/ +#define fopen mfopen +#define stat(a,b) mstat(a,b) +#define lstat(a,b) mstat(a,b) + +#endif + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/e_msoft.h b/e_msoft.h new file mode 100644 index 0000000..cc40015 --- /dev/null +++ b/e_msoft.h @@ -0,0 +1,76 @@ +/* +* $Id: e_msoft.h 577 2007-06-30 15:30:16Z dhiebert $ +* +* Copyright (c) 2002-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Configures ctags for Microsoft environment. +*/ +#ifndef E_MSOFT_H +#define E_MSOFT_H + +/* MS-DOS/Windows doesn't allow manipulation of standard error, + * so we send it to stdout instead. + */ +#define errout stdout + +#define CASE_INSENSITIVE_FILENAMES 1 +#define MANUAL_GLOBBING 1 +#define MSDOS_STYLE_PATH 1 +#define HAVE_DOS_H 1 +#define HAVE_FCNTL_H 1 +#define HAVE_IO_H 1 +#define HAVE_STDLIB_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_SYS_TYPES_H 1 +#define HAVE_TIME_H 1 +#define HAVE_CLOCK 1 +#define HAVE_CHSIZE 1 +#define HAVE_FGETPOS 1 +#define HAVE_STRICMP 1 +#define HAVE_STRNICMP 1 +#define HAVE_STRSTR 1 +#define HAVE_STRERROR 1 +#define HAVE_FINDNEXT 1 +#define HAVE_TEMPNAM 1 +#define tempnam(dir,pfx) _tempnam(dir,pfx) +#define TMPDIR "\\" + +#ifdef __BORLANDC__ + +# define HAVE_DIR_H 1 +# define HAVE_DIRENT_H 1 +# define HAVE_FINDFIRST 1 + +#elif defined (_MSC_VER) + +# define HAVE__FINDFIRST 1 +# define HAVE_DIRECT_H 1 + +# if _MSC_VER >= 1300 +# define findfirst_t intptr_t /* Visual Studio 7 */ +# else +# define findfirst_t long /* Visual Studio 6 or earlier */ +# endif + +#elif defined (__MINGW32__) + +# include <_mingw.h> +# if defined (__MSVCRT__) && __MINGW32_MAJOR_VERSION == 1 && __MINGW32_MINOR_VERSION < 2 +/* Work-around for broken implementation of fgetpos()/fsetpos() on Mingw32 */ +# undef HAVE_FGETPOS +# define NEED_PROTO_FGETPOS 1 +# endif +# define HAVE_DIR_H 1 +# define HAVE_DIRENT_H 1 +# define HAVE__FINDFIRST 1 +# define findfirst_t long +# define ffblk _finddata_t +# define FA_DIREC _A_SUBDIR +# define ff_name name + +#endif + +#endif diff --git a/e_os2.h b/e_os2.h new file mode 100644 index 0000000..53b5f19 --- /dev/null +++ b/e_os2.h @@ -0,0 +1,37 @@ +/* +* $Id: e_os2.h 136 2002-03-08 22:35:19Z darren $ +* +* Copyright (c) 2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Configures ctags for OS/2 environment. +*/ +#ifndef E_OS2_H +#define E_OS2_H + +#define UNIX_PATH_SEPARATOR 1 +#define CASE_INSENSITIVE_FILENAMES 1 +#define HAVE_DIRENT_H 1 +#define HAVE_FCNTL_H 1 +#define HAVE_IO_H 1 +#define HAVE_STDLIB_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_SYS_TYPES_H 1 +#define HAVE_TIME_H 1 +#define HAVE_UNISTD_H 1 +#define HAVE_CLOCK 1 +#define HAVE_CHSIZE 1 +#define HAVE_FGETPOS 1 +#define HAVE_FTRUNCATE 1 +#define HAVE_OPENDIR 1 +#define HAVE_REGCOMP 1 +#define HAVE_REMOVE 1 +#define HAVE_STRERROR 1 +#define HAVE_STRICMP 1 +#define HAVE_STRNICMP 1 +#define HAVE_STRSTR 1 +#define HAVE_TRUNCATE 1 + +#endif diff --git a/e_qdos.h b/e_qdos.h new file mode 100644 index 0000000..52f2500 --- /dev/null +++ b/e_qdos.h @@ -0,0 +1,34 @@ +/* +* $Id: e_qdos.h 136 2002-03-08 22:35:19Z darren $ +* +* Copyright (c) 2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Configures ctags for QDOS environment. +*/ +#ifndef E_QDOS_H +#define E_QDOS_H + +#define HAVE_DIRENT_H 1 +#define HAVE_STDLIB_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_SYS_TIMES_H 1 +#define HAVE_SYS_TYPES_H 1 +#define HAVE_TIME_H 1 +#define HAVE_UNISTD_H 1 +#define STDC_HEADERS 1 +#define HAVE_CLOCK 1 +#define HAVE_FGETPOS 1 +#define HAVE_FTRUNCATE 1 +#define HAVE_OPENDIR 1 +#define HAVE_PUTENV 1 +#define HAVE_REMOVE 1 +#define HAVE_STRERROR 1 +#define HAVE_STRSTR 1 +#define HAVE_TIMES 1 +#define HAVE_TRUNCATE 1 +#define NON_CONST_PUTENV_PROTOTYPE 1 + +#endif diff --git a/e_riscos.h b/e_riscos.h new file mode 100644 index 0000000..a7a3ecc --- /dev/null +++ b/e_riscos.h @@ -0,0 +1,58 @@ +/* +* $Id: e_riscos.h 136 2002-03-08 22:35:19Z darren $ +* +* Copyright (c) 2002, Andrew Wingate +* +* Author: Andrew Wingate +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. It is provided on an as-is basis and no +* responsibility is accepted for its failure to perform as expected. +* +* Configures ctags for RISC OS environment. +* +* We currently use UnixLib. This file is subject to change if +* we move to using SharedCLibrary and libGNU. +*/ +#ifndef E_RISCOS_H +#define E_RISCOS_H + +#define MACROS_USE_PATTERNS 1 +#define DEFAULT_FILE_FORMAT 2 +#define INTERNAL_SORT 1 /* Not all systems will have installed sort(1) */ +#define TMPDIR "" + +/* Various definitions for UnixLib */ +#define STDC_HEADERS 1 +#define HAVE_CHMOD 1 +#define HAVE_CHSIZE 1 +#define HAVE_CLOCK 1 +#define HAVE_FGETPOS 1 +#define HAVE_FNMATCH 1 +#define HAVE_FTRUNCATE 1 +#define HAVE_MKSTEMP 1 +#define HAVE_OPENDIR 1 +#define HAVE_PUTENV 1 +#define HAVE_REGCOMP 1 /* Requires RegEx library */ +#define HAVE_REMOVE 1 +#define HAVE_SETENV 1 +#define HAVE_STRERROR 1 +#define HAVE_STRICMP 1 +#define HAVE_STRNICMP 1 +#define HAVE_STRSTR 1 +#define HAVE_TIMES 1 +#define HAVE_TRUNCATE 1 +#define HAVE_DIRENT_H 1 +#define HAVE_FCNTL_H 1 +#define HAVE_FNMATCH_H 1 +#define HAVE_STAT_H 1 +#define HAVE_STDLIB_H 1 +#define HAVE_STRING_H 1 +#define HAVE_SYS_DIR_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_SYS_TIMES_H 1 +#define HAVE_SYS_TYPES_H 1 +#define HAVE_TIME_H 1 +#define HAVE_UNISTD_H 1 + +#endif diff --git a/e_vms.h b/e_vms.h new file mode 100644 index 0000000..b5cfa36 --- /dev/null +++ b/e_vms.h @@ -0,0 +1,31 @@ +/* +* $Id: e_vms.h 136 2002-03-08 22:35:19Z darren $ +* +* Copyright (c) 2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Configures ctags for VMS environment. +*/ +#ifndef E_VMS_H +#define E_VMS_H + +#define CASE_INSENSITIVE_FILENAMES 1 +#define HAVE_STDLIB_H 1 +#define HAVE_TIME_H 1 +#ifdef VAXC +# define HAVE_STAT_H 1 +# define HAVE_TYPES_H 1 +#else +# define HAVE_FCNTL_H 1 +# define HAVE_SYS_STAT_H 1 +# define HAVE_SYS_TYPES_H 1 +#endif +#define HAVE_CLOCK 1 +#define HAVE_FGETPOS 1 +#define HAVE_STRERROR 1 +#define HAVE_STRSTR 1 +#define HAVE_UNISTD_H 1 + +#endif diff --git a/eiffel.c b/eiffel.c new file mode 100644 index 0000000..b504ac3 --- /dev/null +++ b/eiffel.c @@ -0,0 +1,1352 @@ +/* +* $Id: eiffel.c 706 2009-06-28 23:09:30Z dhiebert $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Eiffel language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#ifdef TYPE_REFERENCE_TOOL +#include +#endif +#include +#include +#include /* to define tolower () */ +#include + +#include "debug.h" +#include "keyword.h" +#include "routines.h" +#include "vstring.h" +#ifndef TYPE_REFERENCE_TOOL +#include "entry.h" +#include "options.h" +#include "parse.h" +#include "read.h" +#endif + +/* +* MACROS +*/ +#define isident(c) (isalnum(c) || (c) == '_') +#define isFreeOperatorChar(c) ((c) == '@' || (c) == '#' || \ + (c) == '|' || (c) == '&') +#define isType(token,t) (boolean) ((token)->type == (t)) +#define isKeyword(token,k) (boolean) ((token)->keyword == (k)) + +/* +* DATA DECLARATIONS +*/ + +typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; + +/* Used to specify type of keyword. + */ +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_alias, KEYWORD_all, KEYWORD_and, KEYWORD_as, KEYWORD_assign, + KEYWORD_check, KEYWORD_class, KEYWORD_convert, KEYWORD_create, + KEYWORD_creation, KEYWORD_Current, + KEYWORD_debug, KEYWORD_deferred, KEYWORD_do, KEYWORD_else, + KEYWORD_elseif, KEYWORD_end, KEYWORD_ensure, KEYWORD_expanded, + KEYWORD_export, KEYWORD_external, KEYWORD_false, KEYWORD_feature, + KEYWORD_from, KEYWORD_frozen, KEYWORD_if, KEYWORD_implies, + KEYWORD_indexing, KEYWORD_infix, KEYWORD_inherit, KEYWORD_inspect, + KEYWORD_invariant, KEYWORD_is, KEYWORD_like, KEYWORD_local, + KEYWORD_loop, KEYWORD_not, KEYWORD_obsolete, KEYWORD_old, KEYWORD_once, + KEYWORD_or, KEYWORD_prefix, KEYWORD_redefine, KEYWORD_rename, + KEYWORD_require, KEYWORD_rescue, KEYWORD_Result, KEYWORD_retry, + KEYWORD_select, KEYWORD_separate, KEYWORD_strip, KEYWORD_then, + KEYWORD_true, KEYWORD_undefine, KEYWORD_unique, KEYWORD_until, + KEYWORD_variant, KEYWORD_when, KEYWORD_xor +} keywordId; + +/* Used to determine whether keyword is valid for the token language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; +} keywordDesc; + +typedef enum eTokenType { + TOKEN_UNDEFINED, + TOKEN_BANG, + TOKEN_CHARACTER, + TOKEN_CLOSE_BRACE, + TOKEN_CLOSE_BRACKET, + TOKEN_CLOSE_PAREN, + TOKEN_COLON, + TOKEN_COMMA, + TOKEN_CONSTRAINT, + TOKEN_DOT, + TOKEN_DOLLAR, + TOKEN_IDENTIFIER, + TOKEN_KEYWORD, + TOKEN_NUMERIC, + TOKEN_OPEN_BRACE, + TOKEN_OPEN_BRACKET, + TOKEN_OPEN_PAREN, + TOKEN_OPERATOR, + TOKEN_OTHER, + TOKEN_QUESTION, + TOKEN_SEMICOLON, + TOKEN_SEPARATOR, + TOKEN_STRING, + TOKEN_TILDE +} tokenType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + boolean isExported; + vString* string; + vString* className; + vString* featureName; +} tokenInfo; + +/* +* DATA DEFINITIONS +*/ + +static langType Lang_eiffel; + +#ifdef TYPE_REFERENCE_TOOL + +static const char *FileName; +static FILE *File; +static int PrintClass; +static int PrintReferences; +static int SelfReferences; +static int Debug; +static stringList *GenericNames; +static stringList *ReferencedTypes; + +#else + +typedef enum { + EKIND_CLASS, EKIND_FEATURE, EKIND_LOCAL, EKIND_QUALIFIED_TAGS +} eiffelKind; + +static kindOption EiffelKinds [] = { + { TRUE, 'c', "class", "classes"}, + { TRUE, 'f', "feature", "features"}, + { FALSE, 'l', "local", "local entities"} +}; + +#endif + +static jmp_buf Exception; + +static const keywordDesc EiffelKeywordTable [] = { + /* keyword keyword ID */ + { "alias", KEYWORD_alias }, + { "all", KEYWORD_all }, + { "and", KEYWORD_and }, + { "as", KEYWORD_as }, + { "assign", KEYWORD_assign }, + { "check", KEYWORD_check }, + { "class", KEYWORD_class }, + { "convert", KEYWORD_convert }, + { "create", KEYWORD_create }, + { "creation", KEYWORD_creation }, + { "current", KEYWORD_Current }, + { "debug", KEYWORD_debug }, + { "deferred", KEYWORD_deferred }, + { "do", KEYWORD_do }, + { "else", KEYWORD_else }, + { "elseif", KEYWORD_elseif }, + { "end", KEYWORD_end }, + { "ensure", KEYWORD_ensure }, + { "expanded", KEYWORD_expanded }, + { "export", KEYWORD_export }, + { "external", KEYWORD_external }, + { "false", KEYWORD_false }, + { "feature", KEYWORD_feature }, + { "from", KEYWORD_from }, + { "frozen", KEYWORD_frozen }, + { "if", KEYWORD_if }, + { "implies", KEYWORD_implies }, + { "indexing", KEYWORD_indexing }, + { "infix", KEYWORD_infix }, + { "inherit", KEYWORD_inherit }, + { "inspect", KEYWORD_inspect }, + { "invariant", KEYWORD_invariant }, + { "is", KEYWORD_is }, + { "like", KEYWORD_like }, + { "local", KEYWORD_local }, + { "loop", KEYWORD_loop }, + { "not", KEYWORD_not }, + { "obsolete", KEYWORD_obsolete }, + { "old", KEYWORD_old }, + { "once", KEYWORD_once }, + { "or", KEYWORD_or }, + { "prefix", KEYWORD_prefix }, + { "redefine", KEYWORD_redefine }, + { "rename", KEYWORD_rename }, + { "require", KEYWORD_require }, + { "rescue", KEYWORD_rescue }, + { "result", KEYWORD_Result }, + { "retry", KEYWORD_retry }, + { "select", KEYWORD_select }, + { "separate", KEYWORD_separate }, + { "strip", KEYWORD_strip }, + { "then", KEYWORD_then }, + { "true", KEYWORD_true }, + { "undefine", KEYWORD_undefine }, + { "unique", KEYWORD_unique }, + { "until", KEYWORD_until }, + { "variant", KEYWORD_variant }, + { "when", KEYWORD_when }, + { "xor", KEYWORD_xor } +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static void buildEiffelKeywordHash (void) +{ + const size_t count = sizeof (EiffelKeywordTable) / + sizeof (EiffelKeywordTable [0]); + size_t i; + for (i = 0 ; i < count ; ++i) + { + const keywordDesc* const p = &EiffelKeywordTable [i]; + addKeyword (p->name, Lang_eiffel, (int) p->id); + } +} + +#ifdef TYPE_REFERENCE_TOOL + +static void addGenericName (tokenInfo *const token) +{ + vStringUpper (token->string); + if (vStringLength (token->string) > 0) + stringListAdd (GenericNames, vStringNewCopy (token->string)); +} + +static boolean isGeneric (tokenInfo *const token) +{ + return (boolean) stringListHas (GenericNames, vStringValue (token->string)); +} + +static void reportType (tokenInfo *const token) +{ + vStringUpper (token->string); + if (vStringLength (token->string) > 0 && ! isGeneric (token) && + (SelfReferences || strcmp (vStringValue ( + token->string), vStringValue (token->className)) != 0) && + ! stringListHas (ReferencedTypes, vStringValue (token->string))) + { + printf ("%s\n", vStringValue (token->string)); + stringListAdd (ReferencedTypes, vStringNewCopy (token->string)); + } +} + +static int fileGetc (void) +{ + int c = getc (File); + if (c == '\r') + { + c = getc (File); + if (c != '\n') + { + ungetc (c, File); + c = '\n'; + } + } + if (Debug > 0 && c != EOF) + putc (c, errout); + return c; +} + +static int fileUngetc (c) +{ + return ungetc (c, File); +} + +extern char *readLine (vString *const vLine, FILE *const fp) +{ + return NULL; +} + +#else + +/* +* Tag generation functions +*/ + +static void makeEiffelClassTag (tokenInfo *const token) +{ + if (EiffelKinds [EKIND_CLASS].enabled) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + + initTagEntry (&e, name); + + e.kindName = EiffelKinds [EKIND_CLASS].name; + e.kind = EiffelKinds [EKIND_CLASS].letter; + + makeTagEntry (&e); + } + vStringCopy (token->className, token->string); +} + +static void makeEiffelFeatureTag (tokenInfo *const token) +{ + if (EiffelKinds [EKIND_FEATURE].enabled && + (token->isExported || Option.include.fileScope)) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + + initTagEntry (&e, name); + + e.isFileScope = (boolean) (! token->isExported); + e.kindName = EiffelKinds [EKIND_FEATURE].name; + e.kind = EiffelKinds [EKIND_FEATURE].letter; + e.extensionFields.scope [0] = EiffelKinds [EKIND_CLASS].name; + e.extensionFields.scope [1] = vStringValue (token->className); + + makeTagEntry (&e); + + if (Option.include.qualifiedTags) + { + vString* qualified = vStringNewInit (vStringValue (token->className)); + vStringPut (qualified, '.'); + vStringCat (qualified, token->string); + e.name = vStringValue (qualified); + makeTagEntry (&e); + vStringDelete (qualified); + } + } + vStringCopy (token->featureName, token->string); +} + +static void makeEiffelLocalTag (tokenInfo *const token) +{ + if (EiffelKinds [EKIND_LOCAL].enabled && Option.include.fileScope) + { + const char *const name = vStringValue (token->string); + vString* scope = vStringNew (); + tagEntryInfo e; + + initTagEntry (&e, name); + + e.isFileScope = TRUE; + e.kindName = EiffelKinds [EKIND_LOCAL].name; + e.kind = EiffelKinds [EKIND_LOCAL].letter; + + vStringCopy (scope, token->className); + vStringPut (scope, '.'); + vStringCat (scope, token->featureName); + + e.extensionFields.scope [0] = EiffelKinds [EKIND_FEATURE].name; + e.extensionFields.scope [1] = vStringValue (scope); + + makeTagEntry (&e); + vStringDelete (scope); + } +} + +#endif + +/* +* Parsing functions +*/ + +static int skipToCharacter (const int c) +{ + int d; + + do + { + d = fileGetc (); + } while (d != EOF && d != c); + + return d; +} + +/* If a numeric is passed in 'c', this is used as the first digit of the + * numeric being parsed. + */ +static vString *parseInteger (int c) +{ + vString *string = vStringNew (); + + if (c == '\0') + c = fileGetc (); + if (c == '-') + { + vStringPut (string, c); + c = fileGetc (); + } + else if (! isdigit (c)) + c = fileGetc (); + while (c != EOF && (isdigit (c) || c == '_')) + { + vStringPut (string, c); + c = fileGetc (); + } + vStringTerminate (string); + fileUngetc (c); + + return string; +} + +static vString *parseNumeric (int c) +{ + vString *string = vStringNew (); + vString *integer = parseInteger (c); + vStringCopy (string, integer); + vStringDelete (integer); + + c = fileGetc (); + if (c == '.') + { + integer = parseInteger ('\0'); + vStringPut (string, c); + vStringCat (string, integer); + vStringDelete (integer); + c = fileGetc (); + } + if (tolower (c) == 'e') + { + integer = parseInteger ('\0'); + vStringPut (string, c); + vStringCat (string, integer); + vStringDelete (integer); + } + else if (!isspace (c)) + fileUngetc (c); + + vStringTerminate (string); + + return string; +} + +static int parseEscapedCharacter (void) +{ + int d = '\0'; + int c = fileGetc (); + + switch (c) + { + case 'A': d = '@'; break; + case 'B': d = '\b'; break; + case 'C': d = '^'; break; + case 'D': d = '$'; break; + case 'F': d = '\f'; break; + case 'H': d = '\\'; break; + case 'L': d = '~'; break; + case 'N': d = '\n'; break; +#ifdef QDOS + case 'Q': d = 0x9F; break; +#else + case 'Q': d = '`'; break; +#endif + case 'R': d = '\r'; break; + case 'S': d = '#'; break; + case 'T': d = '\t'; break; + case 'U': d = '\0'; break; + case 'V': d = '|'; break; + case '%': d = '%'; break; + case '\'': d = '\''; break; + case '"': d = '"'; break; + case '(': d = '['; break; + case ')': d = ']'; break; + case '<': d = '{'; break; + case '>': d = '}'; break; + + case '\n': skipToCharacter ('%'); break; + + case '/': + { + vString *string = parseInteger ('\0'); + const char *value = vStringValue (string); + const unsigned long ascii = atol (value); + vStringDelete (string); + + c = fileGetc (); + if (c == '/' && ascii < 256) + d = ascii; + break; + } + + default: break; + } + return d; +} + +static int parseCharacter (void) +{ + int c = fileGetc (); + int result = c; + + if (c == '%') + result = parseEscapedCharacter (); + + c = fileGetc (); + if (c != '\'') + skipToCharacter ('\n'); + + return result; +} + +static void parseString (vString *const string) +{ + boolean verbatim = FALSE; + boolean align = FALSE; + boolean end = FALSE; + vString *verbatimCloser = vStringNew (); + vString *lastLine = vStringNew (); + int prev = '\0'; + int c; + + while (! end) + { + c = fileGetc (); + if (c == EOF) + end = TRUE; + else if (c == '"') + { + if (! verbatim) + end = TRUE; + else + end = (boolean) (strcmp (vStringValue (lastLine), + vStringValue (verbatimCloser)) == 0); + } + else if (c == '\n') + { + if (verbatim) + vStringClear (lastLine); + if (prev == '[' /* || prev == '{' */) + { + verbatim = TRUE; + vStringClear (verbatimCloser); + vStringClear (lastLine); + if (prev == '{') + vStringPut (verbatimCloser, '}'); + else + { + vStringPut (verbatimCloser, ']'); + align = TRUE; + } + vStringNCat (verbatimCloser, string, vStringLength (string) - 1); + vStringClear (string); + } + if (verbatim && align) + { + do + c = fileGetc (); + while (isspace (c)); + } + } + else if (c == '%') + c = parseEscapedCharacter (); + if (! end) + { + vStringPut (string, c); + if (verbatim) + { + vStringPut (lastLine, c); + vStringTerminate (lastLine); + } + prev = c; + } + } + vStringTerminate (string); + vStringDelete (lastLine); + vStringDelete (verbatimCloser); +} + +/* Read a C identifier beginning with "firstChar" and places it into "name". + */ +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + + do + { + vStringPut (string, c); + c = fileGetc (); + } while (isident (c)); + + vStringTerminate (string); + if (!isspace (c)) + fileUngetc (c); /* unget non-identifier character */ +} + +static void parseFreeOperator (vString *const string, const int firstChar) +{ + int c = firstChar; + + do + { + vStringPut (string, c); + c = fileGetc (); + } while (c > ' '); + + vStringTerminate (string); + if (!isspace (c)) + fileUngetc (c); /* unget non-identifier character */ +} + +static void copyToken (tokenInfo* dst, const tokenInfo *src) +{ + dst->type = src->type; + dst->keyword = src->keyword; + dst->isExported = src->isExported; + + vStringCopy (dst->string, src->string); + vStringCopy (dst->className, src->className); + vStringCopy (dst->featureName, src->featureName); +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->isExported = TRUE; + + token->string = vStringNew (); + token->className = vStringNew (); + token->featureName = vStringNew (); + + return token; +} + +static void deleteToken (tokenInfo *const token) +{ + vStringDelete (token->string); + vStringDelete (token->className); + vStringDelete (token->featureName); + + eFree (token); +} + +static void readToken (tokenInfo *const token) +{ + int c; + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar: + + do + c = fileGetc (); + while (c == '\t' || c == ' ' || c == '\n'); + + switch (c) + { + case EOF: longjmp (Exception, (int)ExceptionEOF); break; + case ';': token->type = TOKEN_SEMICOLON; break; + case '!': token->type = TOKEN_BANG; break; + case '}': token->type = TOKEN_CLOSE_BRACE; break; + case ']': token->type = TOKEN_CLOSE_BRACKET; break; + case ')': token->type = TOKEN_CLOSE_PAREN; break; + case ',': token->type = TOKEN_COMMA; break; + case '$': token->type = TOKEN_DOLLAR; break; + case '.': token->type = TOKEN_DOT; break; + case '{': token->type = TOKEN_OPEN_BRACE; break; + case '[': token->type = TOKEN_OPEN_BRACKET; break; + case '(': token->type = TOKEN_OPEN_PAREN; break; + case '~': token->type = TOKEN_TILDE; break; + + + case '+': + case '*': + case '^': + case '=': token->type = TOKEN_OPERATOR; break; + + case '-': + c = fileGetc (); + if (c == '>') + token->type = TOKEN_CONSTRAINT; + else if (c == '-') /* is this the start of a comment? */ + { + skipToCharacter ('\n'); + goto getNextChar; + } + else + { + if (!isspace (c)) + fileUngetc (c); + token->type = TOKEN_OPERATOR; + } + break; + + case '?': + case ':': + { + int c2 = fileGetc (); + if (c2 == '=') + token->type = TOKEN_OPERATOR; + else + { + if (!isspace (c2)) + fileUngetc (c2); + if (c == ':') + token->type = TOKEN_COLON; + else + token->type = TOKEN_QUESTION; + } + break; + } + + case '<': + c = fileGetc (); + if (c != '=' && c != '>' && !isspace (c)) + fileUngetc (c); + token->type = TOKEN_OPERATOR; + break; + + case '>': + c = fileGetc (); + if (c != '=' && c != '>' && !isspace (c)) + fileUngetc (c); + token->type = TOKEN_OPERATOR; + break; + + case '/': + c = fileGetc (); + if (c != '/' && c != '=' && !isspace (c)) + fileUngetc (c); + token->type = TOKEN_OPERATOR; + break; + + case '\\': + c = fileGetc (); + if (c != '\\' && !isspace (c)) + fileUngetc (c); + token->type = TOKEN_OPERATOR; + break; + + case '"': + token->type = TOKEN_STRING; + parseString (token->string); + break; + + case '\'': + token->type = TOKEN_CHARACTER; + parseCharacter (); + break; + + default: + if (isalpha (c)) + { + parseIdentifier (token->string, c); + token->keyword = analyzeToken (token->string, Lang_eiffel); + if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + else if (isdigit (c)) + { + vString* numeric = parseNumeric (c); + vStringCat (token->string, numeric); + vStringDelete (numeric); + token->type = TOKEN_NUMERIC; + } + else if (isFreeOperatorChar (c)) + { + parseFreeOperator (token->string, c); + token->type = TOKEN_OPERATOR; + } + else + { + token->type = TOKEN_UNDEFINED; + Assert (! isType (token, TOKEN_UNDEFINED)); + } + break; + } +} + +/* +* Scanning functions +*/ + +static boolean isIdentifierMatch ( + const tokenInfo *const token, const char *const name) +{ + return (boolean) (isType (token, TOKEN_IDENTIFIER) && + strcasecmp (vStringValue (token->string), name) == 0); +} + +static void findToken (tokenInfo *const token, const tokenType type) +{ + while (! isType (token, type)) + readToken (token); +} + +static void findKeyword (tokenInfo *const token, const keywordId keyword) +{ + while (! isKeyword (token, keyword)) + readToken (token); +} + +static boolean parseType (tokenInfo *const token); + +static void parseGeneric (tokenInfo *const token, boolean declaration __unused__) +{ + unsigned int depth = 0; +#ifdef TYPE_REFERENCE_TOOL + boolean constraint = FALSE; +#endif + Assert (isType (token, TOKEN_OPEN_BRACKET)); + do + { + if (isType (token, TOKEN_OPEN_BRACKET)) + { + ++depth; + readToken (token); + } + else if (isType (token, TOKEN_CLOSE_BRACKET)) + { + --depth; + readToken (token); + } +#ifdef TYPE_REFERENCE_TOOL + else if (declaration) + { + boolean advanced = FALSE; + if (depth == 1) + { + if (isType (token, TOKEN_CONSTRAINT)) + constraint = TRUE; + else if (isKeyword (token, KEYWORD_create)) + findKeyword (token, KEYWORD_end); + else if (isType (token, TOKEN_IDENTIFIER)) + { + if (constraint) + advanced = parseType (token); + else + addGenericName (token); + constraint = FALSE; + } + } + else if (isType (token, TOKEN_IDENTIFIER)) + advanced = parseType (token); + if (! advanced) + readToken (token); + } +#endif + else + parseType (token); + } while (depth > 0); +} + +static boolean parseType (tokenInfo *const token) +{ + tokenInfo* const id = newToken (); + copyToken (id, token); + readToken (token); + if (isType (token, TOKEN_COLON)) /* check for "{entity: TYPE}" */ + { + readToken (id); + readToken (token); + } + if (isKeyword (id, KEYWORD_like)) + { + if (isType (token, TOKEN_IDENTIFIER) || + isKeyword (token, KEYWORD_Current)) + readToken (token); + } + else + { + if (isKeyword (id, KEYWORD_expanded)) + { + copyToken (id, token); + readToken (token); + } + if (isType (id, TOKEN_IDENTIFIER)) + { +#ifdef TYPE_REFERENCE_TOOL + reportType (id); +#endif + if (isType (token, TOKEN_OPEN_BRACKET)) + parseGeneric (token, FALSE); + else if ((strcmp ("BIT", vStringValue (id->string)) == 0)) + readToken (token); /* read token after number of bits */ + } + } + deleteToken (id); + return TRUE; +} + +static void parseEntityType (tokenInfo *const token) +{ + Assert (isType (token, TOKEN_COLON)); + readToken (token); + + if (isType (token, TOKEN_BANG) || isType (token, TOKEN_QUESTION)) + readToken (token); /* skip over '!' or '?' */ + parseType (token); +} + +static void parseLocal (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_local)); + readToken (token); + + /* Check keyword first in case local clause is empty + */ + while (! isKeyword (token, KEYWORD_do) && + ! isKeyword (token, KEYWORD_once)) + { +#ifndef TYPE_REFERENCE_TOOL + if (isType (token, TOKEN_IDENTIFIER)) + makeEiffelLocalTag (token); +#endif + readToken (token); + if (isType (token, TOKEN_COLON)) + parseEntityType (token); + } +} + +static void findFeatureEnd (tokenInfo *const token) +{ + boolean isFound = isKeyword (token, KEYWORD_is); + if (isFound) + readToken (token); + switch (token->keyword) + { + case KEYWORD_deferred: + case KEYWORD_do: + case KEYWORD_external: + case KEYWORD_local: + case KEYWORD_obsolete: + case KEYWORD_once: + case KEYWORD_require: + { + int depth = 1; + + while (depth > 0) + { +#ifdef TYPE_REFERENCE_TOOL + if (isType (token, TOKEN_OPEN_BRACE)) + { + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + parseType (token); + } + else if (isType (token, TOKEN_BANG)) + { + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + parseType (token); + if (isType (token, TOKEN_BANG)) + readToken (token); + } + else +#endif + switch (token->keyword) + { + case KEYWORD_check: + case KEYWORD_debug: + case KEYWORD_from: + case KEYWORD_if: + case KEYWORD_inspect: + ++depth; + break; + + case KEYWORD_local: + parseLocal (token); + break; + + case KEYWORD_end: + --depth; + break; + + default: + break; + } + readToken (token); + } + break; + } + + default: + /* is this a manifest constant? */ + if (isFound || isType (token, TOKEN_OPERATOR)) { + if (isType (token, TOKEN_OPERATOR)) + readToken (token); + readToken (token); + } + break; + } +} + +static boolean readFeatureName (tokenInfo *const token) +{ + boolean isFeatureName = FALSE; + + if (isKeyword (token, KEYWORD_frozen)) + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + isFeatureName = TRUE; + else if (isKeyword (token, KEYWORD_assign)) /* legacy code */ + isFeatureName = TRUE; + else if (isKeyword (token, KEYWORD_infix) || + isKeyword (token, KEYWORD_prefix)) + { + readToken (token); + if (isType (token, TOKEN_STRING)) + isFeatureName = TRUE; + } + return isFeatureName; +} + +static void parseArguments (tokenInfo *const token) +{ +#ifndef TYPE_REFERENCE_TOOL + findToken (token, TOKEN_CLOSE_PAREN); + readToken (token); +#else + Assert (isType (token, TOKEN_OPEN_PAREN)); + readToken (token); + do + { + if (isType (token, TOKEN_COLON)) + parseEntityType (token); + else + readToken (token); + } while (! isType (token, TOKEN_CLOSE_PAREN)); + readToken (token); +#endif +} + +static boolean parseFeature (tokenInfo *const token) +{ + boolean found = FALSE; + while (readFeatureName (token)) + { + found = TRUE; +#ifndef TYPE_REFERENCE_TOOL + makeEiffelFeatureTag (token); +#endif + readToken (token); + if (isType (token, TOKEN_COMMA)) + readToken (token); + } + if (found) + { + if (isKeyword (token, KEYWORD_alias)) { + readToken (token); +#ifndef TYPE_REFERENCE_TOOL + if (isType (token, TOKEN_STRING)) + makeEiffelFeatureTag (token); +#endif + readToken (token); + } + if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */ + parseArguments (token); + if (isType (token, TOKEN_COLON)) /* a query? */ + parseEntityType (token); + if (isKeyword (token, KEYWORD_assign)) + { + readToken (token); + readToken (token); + } + if (isKeyword (token, KEYWORD_obsolete)) + { + readToken (token); + if (isType (token, TOKEN_STRING)) + readToken (token); + } + findFeatureEnd (token); + } + return found; +} + +static void parseExport (tokenInfo *const token) +{ + token->isExported = TRUE; + readToken (token); + if (isType (token, TOKEN_OPEN_BRACE)) + { + token->isExported = FALSE; + while (! isType (token, TOKEN_CLOSE_BRACE)) + { + if (isType (token, TOKEN_IDENTIFIER)) + token->isExported |= !isIdentifierMatch (token, "NONE"); + readToken (token); + } + readToken (token); + } +} + +static void parseFeatureClauses (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_feature)); + do + { + if (isKeyword (token, KEYWORD_feature)) + parseExport (token); + if (! isKeyword (token, KEYWORD_feature) && + ! isKeyword (token, KEYWORD_invariant) && + ! isKeyword (token, KEYWORD_indexing)) + { + if (! parseFeature (token)) + readToken (token); + } + } while (! isKeyword (token, KEYWORD_end) && + ! isKeyword (token, KEYWORD_invariant) && + ! isKeyword (token, KEYWORD_indexing)); +} + +static void parseRename (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_rename)); + do { + readToken (token); + if (readFeatureName (token)) + { + readToken (token); + if (isKeyword (token, KEYWORD_as)) + { + readToken (token); + if (readFeatureName (token)) + { +#ifndef TYPE_REFERENCE_TOOL + makeEiffelFeatureTag (token); /* renamed feature */ +#endif + readToken (token); + } + } + } + } while (isType (token, TOKEN_COMMA)); +} + +static void parseInherit (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_inherit)); + readToken (token); + while (isType (token, TOKEN_IDENTIFIER)) + { + parseType (token); + if (isType (token, TOKEN_KEYWORD)) + { + switch (token->keyword) /* check for feature adaptation */ + { + case KEYWORD_rename: + parseRename (token); + case KEYWORD_export: + case KEYWORD_undefine: + case KEYWORD_redefine: + case KEYWORD_select: + findKeyword (token, KEYWORD_end); + readToken (token); + break; + + case KEYWORD_end: + readToken (token); + break; + + default: break; + } + } + if (isType (token, TOKEN_SEMICOLON)) + readToken (token); + } +} + +static void parseConvert (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_convert)); + do + { + readToken (token); + if (! isType (token, TOKEN_IDENTIFIER)) + break; + else if (isType (token, TOKEN_OPEN_PAREN)) + { + while (! isType (token, TOKEN_CLOSE_PAREN)) + readToken (token); + } + else if (isType (token, TOKEN_COLON)) + { + readToken (token); + if (! isType (token, TOKEN_OPEN_BRACE)) + break; + else while (! isType (token, TOKEN_CLOSE_BRACE)) + readToken (token); + } + } while (isType (token, TOKEN_COMMA)); +} + +static void parseClass (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_class)); + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + { +#ifndef TYPE_REFERENCE_TOOL + makeEiffelClassTag (token); + readToken (token); +#else + vStringCopy (token->className, token->string); + vStringUpper (token->className); + if (PrintClass) + puts (vStringValue (token->className)); + if (! PrintReferences) + exit (0); + readToken (token); +#endif + } + + do + { + if (isType (token, TOKEN_OPEN_BRACKET)) + parseGeneric (token, TRUE); + else if (! isType (token, TOKEN_KEYWORD)) + readToken (token); + else switch (token->keyword) + { + case KEYWORD_inherit: parseInherit (token); break; + case KEYWORD_feature: parseFeatureClauses (token); break; + case KEYWORD_convert: parseConvert (token); break; + default: readToken (token); break; + } + } while (! isKeyword (token, KEYWORD_end)); +} + +static void initialize (const langType language) +{ + Lang_eiffel = language; + buildEiffelKeywordHash (); +} + +static void findEiffelTags (void) +{ + tokenInfo *const token = newToken (); + exception_t exception; + + exception = (exception_t) (setjmp (Exception)); + while (exception == ExceptionNone) + { + findKeyword (token, KEYWORD_class); + parseClass (token); + } + deleteToken (token); +} + +#ifndef TYPE_REFERENCE_TOOL + +extern parserDefinition* EiffelParser (void) +{ + static const char *const extensions [] = { "e", NULL }; + parserDefinition* def = parserNew ("Eiffel"); + def->kinds = EiffelKinds; + def->kindCount = KIND_COUNT (EiffelKinds); + def->extensions = extensions; + def->parser = findEiffelTags; + def->initialize = initialize; + return def; +} + +#else + +static void findReferences (void) +{ + ReferencedTypes = stringListNew (); + GenericNames = stringListNew (); + initialize (0); + + findEiffelTags (); + + stringListDelete (GenericNames); + GenericNames = NULL; + stringListDelete (ReferencedTypes); + ReferencedTypes = NULL; +} + +static const char *const Usage = + "Prints names of types referenced by an Eiffel language file.\n" + "\n" + "Usage: %s [-cdrs] [file_name | -]\n" + "\n" + "Options:\n" + " -c Print class name of current file (on first line of output).\n" + " -d Enable debug output.\n" + " -r Print types referenced by current file (default unless -c).\n" + " -s Include self-references.\n" + "\n"; + +extern int main (int argc, char** argv) +{ + int i; + for (i = 1 ; argv [i] != NULL ; ++i) + { + const char *const arg = argv [i]; + if (arg [0] == '-') + { + int j; + if (arg [1] == '\0') + { + File = stdin; + FileName = "stdin"; + } + else for (j = 1 ; arg [j] != '\0' ; ++j) switch (arg [j]) + { + case 'c': PrintClass = 1; break; + case 'r': PrintReferences = 1; break; + case 's': SelfReferences = 1; break; + case 'd': Debug = 1; break; + default: + fprintf (errout, "%s: unknown option: %c\n", argv [0], arg [1]); + fprintf (errout, Usage, argv [0]); + exit (1); + break; + } + } + else if (File != NULL) + { + fprintf (errout, Usage, argv [0]); + exit (1); + } + else + { + FileName = arg; + File = fopen (FileName, "r"); + if (File == NULL) + { + perror (argv [0]); + exit (1); + } + } + } + if (! PrintClass) + PrintReferences = 1; + if (File == NULL) + { + fprintf (errout, Usage, argv [0]); + exit (1); + } + else + { + findReferences (); + fclose (File); + } + return 0; +} + +#endif + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/entry.c b/entry.c new file mode 100644 index 0000000..3890e50 --- /dev/null +++ b/entry.c @@ -0,0 +1,847 @@ +/* +* $Id: entry.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1996-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for creating tag entries. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include +#include /* to define isspace () */ +#include + +#if defined (HAVE_SYS_TYPES_H) +# include /* to declare off_t on some hosts */ +#endif +#if defined (HAVE_TYPES_H) +# include /* to declare off_t on some hosts */ +#endif +#if defined (HAVE_UNISTD_H) +# include /* to declare close (), ftruncate (), truncate () */ +#endif + +/* These header files provide for the functions necessary to do file + * truncation. + */ +#ifdef HAVE_FCNTL_H +# include +#endif +#ifdef HAVE_IO_H +# include +#endif + +#include "debug.h" +#include "ctags.h" +#include "entry.h" +#include "main.h" +#include "options.h" +#include "read.h" +#include "routines.h" +#include "sort.h" +#include "strlist.h" + +/* +* MACROS +*/ +#define PSEUDO_TAG_PREFIX "!_" + +#define includeExtensionFlags() (Option.tagFileFormat > 1) + +/* + * Portability defines + */ +#if !defined(HAVE_TRUNCATE) && !defined(HAVE_FTRUNCATE) && !defined(HAVE_CHSIZE) +# define USE_REPLACEMENT_TRUNCATE +#endif + +/* Hack for rediculous practice of Microsoft Visual C++. + */ +#if defined (WIN32) && defined (_MSC_VER) +# define chsize _chsize +# define open _open +# define close _close +# define O_RDWR _O_RDWR +#endif + +/* +* DATA DEFINITIONS +*/ + +tagFile TagFile = { + NULL, /* tag file name */ + NULL, /* tag file directory (absolute) */ + NULL, /* file pointer */ + { 0, 0 }, /* numTags */ + { 0, 0, 0 }, /* max */ + { NULL, NULL, 0 }, /* etags */ + NULL /* vLine */ +}; + +static boolean TagsToStdout = FALSE; + +/* +* FUNCTION PROTOTYPES +*/ +#ifdef NEED_PROTO_TRUNCATE +extern int truncate (const char *path, off_t length); +#endif + +#ifdef NEED_PROTO_FTRUNCATE +extern int ftruncate (int fd, off_t length); +#endif + +/* +* FUNCTION DEFINITIONS +*/ + +extern void freeTagFileResources (void) +{ + if (TagFile.directory != NULL) + eFree (TagFile.directory); + vStringDelete (TagFile.vLine); +} + +extern const char *tagFileName (void) +{ + return TagFile.name; +} + +/* +* Pseudo tag support +*/ + +static void rememberMaxLengths (const size_t nameLength, const size_t lineLength) +{ + if (nameLength > TagFile.max.tag) + TagFile.max.tag = nameLength; + + if (lineLength > TagFile.max.line) + TagFile.max.line = lineLength; +} + +static void writePseudoTag ( + const char *const tagName, + const char *const fileName, + const char *const pattern) +{ + const int length = fprintf ( + TagFile.fp, "%s%s\t%s\t/%s/\n", + PSEUDO_TAG_PREFIX, tagName, fileName, pattern); + ++TagFile.numTags.added; + rememberMaxLengths (strlen (tagName), (size_t) length); +} + +static void addPseudoTags (void) +{ + if (! Option.xref) + { + char format [11]; + const char *formatComment = "unknown format"; + + sprintf (format, "%u", Option.tagFileFormat); + + if (Option.tagFileFormat == 1) + formatComment = "original ctags format"; + else if (Option.tagFileFormat == 2) + formatComment = + "extended format; --format=1 will not append ;\" to lines"; + + writePseudoTag ("TAG_FILE_FORMAT", format, formatComment); + writePseudoTag ("TAG_FILE_SORTED", + Option.sorted == SO_FOLDSORTED ? "2" : + (Option.sorted == SO_SORTED ? "1" : "0"), + "0=unsorted, 1=sorted, 2=foldcase"); + writePseudoTag ("TAG_PROGRAM_AUTHOR", AUTHOR_NAME, AUTHOR_EMAIL); + writePseudoTag ("TAG_PROGRAM_NAME", PROGRAM_NAME, ""); + writePseudoTag ("TAG_PROGRAM_URL", PROGRAM_URL, "official site"); + writePseudoTag ("TAG_PROGRAM_VERSION", PROGRAM_VERSION, ""); + } +} + +static void updateSortedFlag ( + const char *const line, FILE *const fp, fpos_t startOfLine) +{ + const char *const tab = strchr (line, '\t'); + + if (tab != NULL) + { + const long boolOffset = tab - line + 1; /* where it should be */ + + if (line [boolOffset] == '0' || line [boolOffset] == '1') + { + fpos_t nextLine; + + if (fgetpos (fp, &nextLine) == -1 || fsetpos (fp, &startOfLine) == -1) + error (WARNING, "Failed to update 'sorted' pseudo-tag"); + else + { + fpos_t flagLocation; + int c, d; + + do + c = fgetc (fp); + while (c != '\t' && c != '\n'); + fgetpos (fp, &flagLocation); + d = fgetc (fp); + if (c == '\t' && (d == '0' || d == '1') && + d != (int) Option.sorted) + { + fsetpos (fp, &flagLocation); + fputc (Option.sorted == SO_FOLDSORTED ? '2' : + (Option.sorted == SO_SORTED ? '1' : '0'), fp); + } + fsetpos (fp, &nextLine); + } + } + } +} + +/* Look through all line beginning with "!_TAG_FILE", and update those which + * require it. + */ +static long unsigned int updatePseudoTags (FILE *const fp) +{ + enum { maxEntryLength = 20 }; + char entry [maxEntryLength + 1]; + unsigned long linesRead = 0; + fpos_t startOfLine; + size_t entryLength; + const char *line; + + sprintf (entry, "%sTAG_FILE", PSEUDO_TAG_PREFIX); + entryLength = strlen (entry); + Assert (entryLength < maxEntryLength); + + fgetpos (fp, &startOfLine); + line = readLine (TagFile.vLine, fp); + while (line != NULL && line [0] == entry [0]) + { + ++linesRead; + if (strncmp (line, entry, entryLength) == 0) + { + char tab, classType [16]; + + if (sscanf (line + entryLength, "%15s%c", classType, &tab) == 2 && + tab == '\t') + { + if (strcmp (classType, "_SORTED") == 0) + updateSortedFlag (line, fp, startOfLine); + } + fgetpos (fp, &startOfLine); + } + line = readLine (TagFile.vLine, fp); + } + while (line != NULL) /* skip to end of file */ + { + ++linesRead; + line = readLine (TagFile.vLine, fp); + } + return linesRead; +} + +/* + * Tag file management + */ + +static boolean isValidTagAddress (const char *const excmd) +{ + boolean isValid = FALSE; + + if (strchr ("/?", excmd [0]) != NULL) + isValid = TRUE; + else + { + char *address = xMalloc (strlen (excmd) + 1, char); + if (sscanf (excmd, "%[^;\n]", address) == 1 && + strspn (address,"0123456789") == strlen (address)) + isValid = TRUE; + eFree (address); + } + return isValid; +} + +static boolean isCtagsLine (const char *const line) +{ + enum fieldList { TAG, TAB1, SRC_FILE, TAB2, EXCMD, NUM_FIELDS }; + boolean ok = FALSE; /* we assume not unless confirmed */ + const size_t fieldLength = strlen (line) + 1; + char *const fields = xMalloc (NUM_FIELDS * fieldLength, char); + + if (fields == NULL) + error (FATAL, "Cannot analyze tag file"); + else + { +#define field(x) (fields + ((size_t) (x) * fieldLength)) + + const int numFields = sscanf ( + line, "%[^\t]%[\t]%[^\t]%[\t]%[^\r\n]", + field (TAG), field (TAB1), field (SRC_FILE), + field (TAB2), field (EXCMD)); + + /* There must be exactly five fields: two tab fields containing + * exactly one tab each, the tag must not begin with "#", and the + * file name should not end with ";", and the excmd must be + * accceptable. + * + * These conditions will reject tag-looking lines like: + * int a; + * #define LABEL + */ + if (numFields == NUM_FIELDS && + strlen (field (TAB1)) == 1 && + strlen (field (TAB2)) == 1 && + field (TAG) [0] != '#' && + field (SRC_FILE) [strlen (field (SRC_FILE)) - 1] != ';' && + isValidTagAddress (field (EXCMD))) + ok = TRUE; + + eFree (fields); + } + return ok; +} + +static boolean isEtagsLine (const char *const line) +{ + boolean result = FALSE; + if (line [0] == '\f') + result = (boolean) (line [1] == '\n' || line [1] == '\r'); + return result; +} + +static boolean isTagFile (const char *const filename) +{ + boolean ok = FALSE; /* we assume not unless confirmed */ + FILE *const fp = fopen (filename, "rb"); + + if (fp == NULL && errno == ENOENT) + ok = TRUE; + else if (fp != NULL) + { + const char *line = readLine (TagFile.vLine, fp); + + if (line == NULL) + ok = TRUE; + else + ok = (boolean) (isCtagsLine (line) || isEtagsLine (line)); + fclose (fp); + } + return ok; +} + +extern void copyBytes (FILE* const fromFp, FILE* const toFp, const long size) +{ + enum { BufferSize = 1000 }; + long toRead, numRead; + char* buffer = xMalloc (BufferSize, char); + long remaining = size; + do + { + toRead = (0 < remaining && remaining < BufferSize) ? + remaining : (long) BufferSize; + numRead = fread (buffer, (size_t) 1, (size_t) toRead, fromFp); + if (fwrite (buffer, (size_t)1, (size_t)numRead, toFp) < (size_t)numRead) + error (FATAL | PERROR, "cannot complete write"); + if (remaining > 0) + remaining -= numRead; + } while (numRead == toRead && remaining != 0); + eFree (buffer); +} + +extern void copyFile (const char *const from, const char *const to, const long size) +{ + FILE* const fromFp = fopen (from, "rb"); + if (fromFp == NULL) + error (FATAL | PERROR, "cannot open file to copy"); + else + { + FILE* const toFp = fopen (to, "wb"); + if (toFp == NULL) + error (FATAL | PERROR, "cannot open copy destination"); + else + { + copyBytes (fromFp, toFp, size); + fclose (toFp); + } + fclose (fromFp); + } +} + +extern void openTagFile (void) +{ + setDefaultTagFileName (); + TagsToStdout = isDestinationStdout (); + + if (TagFile.vLine == NULL) + TagFile.vLine = vStringNew (); + + /* Open the tags file. + */ + if (TagsToStdout) + TagFile.fp = tempFile ("w", &TagFile.name); + else + { + boolean fileExists; + + setDefaultTagFileName (); + TagFile.name = eStrdup (Option.tagFileName); + fileExists = doesFileExist (TagFile.name); + if (fileExists && ! isTagFile (TagFile.name)) + error (FATAL, + "\"%s\" doesn't look like a tag file; I refuse to overwrite it.", + TagFile.name); + + if (Option.etags) + { + if (Option.append && fileExists) + TagFile.fp = fopen (TagFile.name, "a+b"); + else + TagFile.fp = fopen (TagFile.name, "w+b"); + } + else + { + if (Option.append && fileExists) + { + TagFile.fp = fopen (TagFile.name, "r+"); + if (TagFile.fp != NULL) + { + TagFile.numTags.prev = updatePseudoTags (TagFile.fp); + fclose (TagFile.fp); + TagFile.fp = fopen (TagFile.name, "a+"); + } + } + else + { + TagFile.fp = fopen (TagFile.name, "w"); + if (TagFile.fp != NULL) + addPseudoTags (); + } + } + if (TagFile.fp == NULL) + { + error (FATAL | PERROR, "cannot open tag file"); + exit (1); + } + } + if (TagsToStdout) + TagFile.directory = eStrdup (CurrentDirectory); + else + TagFile.directory = absoluteDirname (TagFile.name); +} + +#ifdef USE_REPLACEMENT_TRUNCATE + +/* Replacement for missing library function. + */ +static int replacementTruncate (const char *const name, const long size) +{ + char *tempName = NULL; + FILE *fp = tempFile ("w", &tempName); + fclose (fp); + copyFile (name, tempName, size); + copyFile (tempName, name, WHOLE_FILE); + remove (tempName); + eFree (tempName); + + return 0; +} + +#endif + +static void sortTagFile (void) +{ + if (TagFile.numTags.added > 0L) + { + if (Option.sorted != SO_UNSORTED) + { + verbose ("sorting tag file\n"); +#ifdef EXTERNAL_SORT + externalSortTags (TagsToStdout); +#else + internalSortTags (TagsToStdout); +#endif + } + else if (TagsToStdout) + catFile (tagFileName ()); + } + if (TagsToStdout) + remove (tagFileName ()); /* remove temporary file */ +} + +static void resizeTagFile (const long newSize) +{ + int result; + +#ifdef USE_REPLACEMENT_TRUNCATE + result = replacementTruncate (TagFile.name, newSize); +#else +# ifdef HAVE_TRUNCATE + result = truncate (TagFile.name, (off_t) newSize); +# else + const int fd = open (TagFile.name, O_RDWR); + + if (fd == -1) + result = -1; + else + { +# ifdef HAVE_FTRUNCATE + result = ftruncate (fd, (off_t) newSize); +# else +# ifdef HAVE_CHSIZE + result = chsize (fd, newSize); +# endif +# endif + close (fd); + } +# endif +#endif + if (result == -1) + fprintf (errout, "Cannot shorten tag file: errno = %d\n", errno); +} + +static void writeEtagsIncludes (FILE *const fp) +{ + if (Option.etagsInclude) + { + unsigned int i; + for (i = 0 ; i < stringListCount (Option.etagsInclude) ; ++i) + { + vString *item = stringListItem (Option.etagsInclude, i); + fprintf (fp, "\f\n%s,include\n", vStringValue (item)); + } + } +} + +extern void closeTagFile (const boolean resize) +{ + long desiredSize, size; + + if (Option.etags) + writeEtagsIncludes (TagFile.fp); + desiredSize = ftell (TagFile.fp); + fseek (TagFile.fp, 0L, SEEK_END); + size = ftell (TagFile.fp); + fclose (TagFile.fp); + if (resize && desiredSize < size) + { + DebugStatement ( + debugPrintf (DEBUG_STATUS, "shrinking %s from %ld to %ld bytes\n", + TagFile.name, size, desiredSize); ) + resizeTagFile (desiredSize); + } + sortTagFile (); + eFree (TagFile.name); + TagFile.name = NULL; +} + +extern void beginEtagsFile (void) +{ + TagFile.etags.fp = tempFile ("w+b", &TagFile.etags.name); + TagFile.etags.byteCount = 0; +} + +extern void endEtagsFile (const char *const name) +{ + const char *line; + + fprintf (TagFile.fp, "\f\n%s,%ld\n", name, (long) TagFile.etags.byteCount); + if (TagFile.etags.fp != NULL) + { + rewind (TagFile.etags.fp); + while ((line = readLine (TagFile.vLine, TagFile.etags.fp)) != NULL) + fputs (line, TagFile.fp); + fclose (TagFile.etags.fp); + remove (TagFile.etags.name); + eFree (TagFile.etags.name); + TagFile.etags.fp = NULL; + TagFile.etags.name = NULL; + } +} + +/* + * Tag entry management + */ + +/* This function copies the current line out to a specified file. It has no + * effect on the fileGetc () function. During copying, any '\' characters + * are doubled and a leading '^' or trailing '$' is also quoted. End of line + * characters (line feed or carriage return) are dropped. + */ +static size_t writeSourceLine (FILE *const fp, const char *const line) +{ + size_t length = 0; + const char *p; + + /* Write everything up to, but not including, a line end character. + */ + for (p = line ; *p != '\0' ; ++p) + { + const int next = *(p + 1); + const int c = *p; + + if (c == CRETURN || c == NEWLINE) + break; + + /* If character is '\', or a terminal '$', then quote it. + */ + if (c == BACKSLASH || c == (Option.backward ? '?' : '/') || + (c == '$' && (next == NEWLINE || next == CRETURN))) + { + putc (BACKSLASH, fp); + ++length; + } + putc (c, fp); + ++length; + } + return length; +} + +/* Writes "line", stripping leading and duplicate white space. + */ +static size_t writeCompactSourceLine (FILE *const fp, const char *const line) +{ + boolean lineStarted = FALSE; + size_t length = 0; + const char *p; + int c; + + /* Write everything up to, but not including, the newline. + */ + for (p = line, c = *p ; c != NEWLINE && c != '\0' ; c = *++p) + { + if (lineStarted || ! isspace (c)) /* ignore leading spaces */ + { + lineStarted = TRUE; + if (isspace (c)) + { + int next; + + /* Consume repeating white space. + */ + while (next = *(p+1) , isspace (next) && next != NEWLINE) + ++p; + c = ' '; /* force space character for any white space */ + } + if (c != CRETURN || *(p + 1) != NEWLINE) + { + putc (c, fp); + ++length; + } + } + } + return length; +} + +static int writeXrefEntry (const tagEntryInfo *const tag) +{ + const char *const line = + readSourceLine (TagFile.vLine, tag->filePosition, NULL); + int length; + + if (Option.tagFileFormat == 1) + length = fprintf (TagFile.fp, "%-16s %4lu %-16s ", tag->name, + tag->lineNumber, tag->sourceFileName); + else + length = fprintf (TagFile.fp, "%-16s %-10s %4lu %-16s ", tag->name, + tag->kindName, tag->lineNumber, tag->sourceFileName); + + length += writeCompactSourceLine (TagFile.fp, line); + putc (NEWLINE, TagFile.fp); + ++length; + + return length; +} + +/* Truncates the text line containing the tag at the character following the + * tag, providing a character which designates the end of the tag. + */ +static void truncateTagLine ( + char *const line, const char *const token, const boolean discardNewline) +{ + char *p = strstr (line, token); + + if (p != NULL) + { + p += strlen (token); + if (*p != '\0' && ! (*p == '\n' && discardNewline)) + ++p; /* skip past character terminating character */ + *p = '\0'; + } +} + +static int writeEtagsEntry (const tagEntryInfo *const tag) +{ + int length; + + if (tag->isFileEntry) + length = fprintf (TagFile.etags.fp, "\177%s\001%lu,0\n", + tag->name, tag->lineNumber); + else + { + long seekValue; + char *const line = + readSourceLine (TagFile.vLine, tag->filePosition, &seekValue); + + if (tag->truncateLine) + truncateTagLine (line, tag->name, TRUE); + else + line [strlen (line) - 1] = '\0'; + + length = fprintf (TagFile.etags.fp, "%s\177%s\001%lu,%ld\n", line, + tag->name, tag->lineNumber, seekValue); + } + TagFile.etags.byteCount += length; + + return length; +} + +static int addExtensionFields (const tagEntryInfo *const tag) +{ + const char* const kindKey = Option.extensionFields.kindKey ? "kind:" : ""; + boolean first = TRUE; + const char* separator = ";\""; + const char* const empty = ""; + int length = 0; +/* "sep" returns a value only the first time it is evaluated */ +#define sep (first ? (first = FALSE, separator) : empty) + + if (tag->kindName != NULL && (Option.extensionFields.kindLong || + (Option.extensionFields.kind && tag->kind == '\0'))) + length += fprintf (TagFile.fp,"%s\t%s%s", sep, kindKey, tag->kindName); + else if (tag->kind != '\0' && (Option.extensionFields.kind || + (Option.extensionFields.kindLong && tag->kindName == NULL))) + length += fprintf (TagFile.fp, "%s\t%s%c", sep, kindKey, tag->kind); + + if (Option.extensionFields.lineNumber) + length += fprintf (TagFile.fp, "%s\tline:%ld", sep, tag->lineNumber); + + if (Option.extensionFields.language && tag->language != NULL) + length += fprintf (TagFile.fp, "%s\tlanguage:%s", sep, tag->language); + + if (Option.extensionFields.scope && + tag->extensionFields.scope [0] != NULL && + tag->extensionFields.scope [1] != NULL) + length += fprintf (TagFile.fp, "%s\t%s:%s", sep, + tag->extensionFields.scope [0], + tag->extensionFields.scope [1]); + + if (Option.extensionFields.typeRef && + tag->extensionFields.typeRef [0] != NULL && + tag->extensionFields.typeRef [1] != NULL) + length += fprintf (TagFile.fp, "%s\ttyperef:%s:%s", sep, + tag->extensionFields.typeRef [0], + tag->extensionFields.typeRef [1]); + + if (Option.extensionFields.fileScope && tag->isFileScope) + length += fprintf (TagFile.fp, "%s\tfile:", sep); + + if (Option.extensionFields.inheritance && + tag->extensionFields.inheritance != NULL) + length += fprintf (TagFile.fp, "%s\tinherits:%s", sep, + tag->extensionFields.inheritance); + + if (Option.extensionFields.access && tag->extensionFields.access != NULL) + length += fprintf (TagFile.fp, "%s\taccess:%s", sep, + tag->extensionFields.access); + + if (Option.extensionFields.implementation && + tag->extensionFields.implementation != NULL) + length += fprintf (TagFile.fp, "%s\timplementation:%s", sep, + tag->extensionFields.implementation); + + if (Option.extensionFields.signature && + tag->extensionFields.signature != NULL) + length += fprintf (TagFile.fp, "%s\tsignature:%s", sep, + tag->extensionFields.signature); + + return length; +#undef sep +} + +static int writePatternEntry (const tagEntryInfo *const tag) +{ + char *const line = readSourceLine (TagFile.vLine, tag->filePosition, NULL); + const int searchChar = Option.backward ? '?' : '/'; + boolean newlineTerminated; + int length = 0; + + if (tag->truncateLine) + truncateTagLine (line, tag->name, FALSE); + newlineTerminated = (boolean) (line [strlen (line) - 1] == '\n'); + + length += fprintf (TagFile.fp, "%c^", searchChar); + length += writeSourceLine (TagFile.fp, line); + length += fprintf (TagFile.fp, "%s%c", newlineTerminated ? "$":"", searchChar); + + return length; +} + +static int writeLineNumberEntry (const tagEntryInfo *const tag) +{ + return fprintf (TagFile.fp, "%lu", tag->lineNumber); +} + +static int writeCtagsEntry (const tagEntryInfo *const tag) +{ + int length = fprintf (TagFile.fp, "%s\t%s\t", + tag->name, tag->sourceFileName); + + if (tag->lineNumberEntry) + length += writeLineNumberEntry (tag); + else + length += writePatternEntry (tag); + + if (includeExtensionFlags ()) + length += addExtensionFields (tag); + + length += fprintf (TagFile.fp, "\n"); + + return length; +} + +extern void makeTagEntry (const tagEntryInfo *const tag) +{ + Assert (tag->name != NULL); + if (tag->name [0] == '\0') + error (WARNING, "ignoring null tag in %s", vStringValue (File.name)); + else + { + int length = 0; + + DebugStatement ( debugEntry (tag); ) + if (Option.xref) + { + if (! tag->isFileEntry) + length = writeXrefEntry (tag); + } + else if (Option.etags) + length = writeEtagsEntry (tag); + else + length = writeCtagsEntry (tag); + + ++TagFile.numTags.added; + rememberMaxLengths (strlen (tag->name), (size_t) length); + DebugStatement ( fflush (TagFile.fp); ) + } +} + +extern void initTagEntry (tagEntryInfo *const e, const char *const name) +{ + Assert (File.source.name != NULL); + memset (e, 0, sizeof (tagEntryInfo)); + e->lineNumberEntry = (boolean) (Option.locate == EX_LINENUM); + e->lineNumber = getSourceLineNumber (); + e->language = getSourceLanguageName (); + e->filePosition = getInputFilePosition (); + e->sourceFileName = getSourceFileTagPath (); + e->name = name; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/entry.h b/entry.h new file mode 100644 index 0000000..2365c50 --- /dev/null +++ b/entry.h @@ -0,0 +1,103 @@ +/* +* $Id: entry.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to entry.c +*/ +#ifndef _ENTRY_H +#define _ENTRY_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "vstring.h" + +/* +* MACROS +*/ +#define WHOLE_FILE -1L + +/* +* DATA DECLARATIONS +*/ + +/* Maintains the state of the tag file. + */ +typedef struct eTagFile { + char *name; + char *directory; + FILE *fp; + struct sNumTags { unsigned long added, prev; } numTags; + struct sMax { size_t line, tag, file; } max; + struct sEtags { + char *name; + FILE *fp; + size_t byteCount; + } etags; + vString *vLine; +} tagFile; + +typedef struct sTagFields { + unsigned int count; /* number of additional extension flags */ + const char *const *label; /* list of labels for extension flags */ + const char *const *value; /* list of values for extension flags */ +} tagFields; + +/* Information about the current tag candidate. + */ +typedef struct sTagEntryInfo { + boolean lineNumberEntry; /* pattern or line number entry */ + unsigned long lineNumber; /* line number of tag */ + fpos_t filePosition; /* file position of line containing tag */ + const char* language; /* language of source file */ + boolean isFileScope; /* is tag visibile only within source file? */ + boolean isFileEntry; /* is this just an entry for a file name? */ + boolean truncateLine; /* truncate tag line at end of tag name? */ + const char *sourceFileName; /* name of source file */ + const char *name; /* name of the tag */ + const char *kindName; /* kind of tag */ + char kind; /* single character representation of kind */ + struct { + const char* access; + const char* fileScope; + const char* implementation; + const char* inheritance; + const char* scope [2]; /* value and key */ + const char* signature; + + /* type (union/struct/etc.) and name for a variable or typedef. */ + const char* typeRef [2]; /* e.g., "struct" and struct name */ + + } extensionFields; /* list of extension fields*/ +} tagEntryInfo; + +/* +* GLOBAL VARIABLES +*/ +extern tagFile TagFile; + +/* +* FUNCTION PROTOTYPES +*/ +extern void freeTagFileResources (void); +extern const char *tagFileName (void); +extern void copyBytes (FILE* const fromFp, FILE* const toFp, const long size); +extern void copyFile (const char *const from, const char *const to, const long size); +extern void openTagFile (void); +extern void closeTagFile (const boolean resize); +extern void beginEtagsFile (void); +extern void endEtagsFile (const char *const name); +extern void makeTagEntry (const tagEntryInfo *const tag); +extern void initTagEntry (tagEntryInfo *const e, const char *const name); + +#endif /* _ENTRY_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/erlang.c b/erlang.c new file mode 100644 index 0000000..23469aa --- /dev/null +++ b/erlang.c @@ -0,0 +1,189 @@ +/* +* $Id: erlang.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2003, Brent Fulgham +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Erlang language +* files. Some of the parsing constructs are based on the Emacs 'etags' +* program by Francesco Potori +*/ +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "entry.h" +#include "options.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_MACRO, K_FUNCTION, K_MODULE, K_RECORD +} erlangKind; + +static kindOption ErlangKinds[] = { + {TRUE, 'd', "macro", "macro definitions"}, + {TRUE, 'f', "function", "functions"}, + {TRUE, 'm', "module", "modules"}, + {TRUE, 'r', "record", "record definitions"}, +}; + +/* +* FUNCTION DEFINITIONS +*/ +/* tagEntryInfo and vString should be preinitialized/preallocated but not + * necessary. If successful you will find class name in vString + */ + +static boolean isIdentifierFirstCharacter (int c) +{ + return (boolean) (isalpha (c)); +} + +static boolean isIdentifierCharacter (int c) +{ + return (boolean) (isalnum (c) || c == '_' || c == ':'); +} + +static const unsigned char *skipSpace (const unsigned char *cp) +{ + while (isspace ((int) *cp)) + ++cp; + return cp; +} + +static const unsigned char *parseIdentifier ( + const unsigned char *cp, vString *const identifier) +{ + vStringClear (identifier); + while (isIdentifierCharacter ((int) *cp)) + { + vStringPut (identifier, (int) *cp); + ++cp; + } + vStringTerminate (identifier); + return cp; +} + +static void makeMemberTag ( + vString *const identifier, erlangKind kind, vString *const module) +{ + if (ErlangKinds [kind].enabled && vStringLength (identifier) > 0) + { + tagEntryInfo tag; + initTagEntry (&tag, vStringValue (identifier)); + tag.kindName = ErlangKinds[kind].name; + tag.kind = ErlangKinds[kind].letter; + + if (module != NULL && vStringLength (module) > 0) + { + tag.extensionFields.scope [0] = "module"; + tag.extensionFields.scope [1] = vStringValue (module); + } + makeTagEntry (&tag); + } +} + +static void parseModuleTag (const unsigned char *cp, vString *const module) +{ + vString *const identifier = vStringNew (); + parseIdentifier (cp, identifier); + makeSimpleTag (identifier, ErlangKinds, K_MODULE); + + /* All further entries go in the new module */ + vStringCopy (module, identifier); + vStringDelete (identifier); +} + +static void parseSimpleTag (const unsigned char *cp, erlangKind kind) +{ + vString *const identifier = vStringNew (); + parseIdentifier (cp, identifier); + makeSimpleTag (identifier, ErlangKinds, kind); + vStringDelete (identifier); +} + +static void parseFunctionTag (const unsigned char *cp, vString *const module) +{ + vString *const identifier = vStringNew (); + parseIdentifier (cp, identifier); + makeMemberTag (identifier, K_FUNCTION, module); + vStringDelete (identifier); +} + +/* + * Directives are of the form: + * -module(foo) + * -define(foo, bar) + * -record(graph, {vtab = notable, cyclic = true}). + */ +static void parseDirective (const unsigned char *cp, vString *const module) +{ + /* + * A directive will be either a record definition or a directive. + * Record definitions are handled separately + */ + vString *const directive = vStringNew (); + const char *const drtv = vStringValue (directive); + cp = parseIdentifier (cp, directive); + cp = skipSpace (cp); + if (*cp == '(') + ++cp; + + if (strcmp (drtv, "record") == 0) + parseSimpleTag (cp, K_RECORD); + else if (strcmp (drtv, "define") == 0) + parseSimpleTag (cp, K_MACRO); + else if (strcmp (drtv, "module") == 0) + parseModuleTag (cp, module); + /* Otherwise, it was an import, export, etc. */ + + vStringDelete (directive); +} + +static void findErlangTags (void) +{ + vString *const module = vStringNew (); + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp = line; + + if (*cp == '%') /* skip initial comment */ + continue; + if (*cp == '"') /* strings sometimes start in column one */ + continue; + + if ( *cp == '-') + { + ++cp; /* Move off of the '-' */ + parseDirective(cp, module); + } + else if (isIdentifierFirstCharacter ((int) *cp)) + parseFunctionTag (cp, module); + } + vStringDelete (module); +} + +extern parserDefinition *ErlangParser (void) +{ + static const char *const extensions[] = { "erl", "ERL", "hrl", "HRL", NULL }; + parserDefinition *def = parserNew ("Erlang"); + def->kinds = ErlangKinds; + def->kindCount = KIND_COUNT (ErlangKinds); + def->extensions = extensions; + def->parser = findErlangTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/flex.c b/flex.c new file mode 100644 index 0000000..06ca243 --- /dev/null +++ b/flex.c @@ -0,0 +1,2243 @@ +/* + * $Id: flex.c 666 2008-05-15 17:47:31Z dfishburn $ + * + * Copyright (c) 2008, David Fishburn + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. + * + * This module contains functions for generating tags for Adobe languages. + * There are a number of different ones, but this will begin with: + * Flex + * MXML files (*.mMacromedia XML) + * ActionScript files (*.as) + * + * Flex 3 language reference + * http://livedocs.adobe.com/flex/3/langref/index.html + */ + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ +#include /* to define isalpha () */ +#include +#ifdef DEBUG +#include +#endif + +#include "debug.h" +#include "entry.h" +#include "keyword.h" +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* + * MACROS + */ +#define isType(token,t) (boolean) ((token)->type == (t)) +#define isKeyword(token,k) (boolean) ((token)->keyword == (k)) + +/* + * DATA DECLARATIONS + */ + +typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; + +/* + * Tracks class and function names already created + */ +static stringList *ClassNames; +static stringList *FunctionNames; + +/* Used to specify type of keyword. +*/ +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_function, + KEYWORD_capital_function, + KEYWORD_object, + KEYWORD_capital_object, + KEYWORD_prototype, + KEYWORD_var, + KEYWORD_new, + KEYWORD_this, + KEYWORD_for, + KEYWORD_while, + KEYWORD_do, + KEYWORD_if, + KEYWORD_else, + KEYWORD_switch, + KEYWORD_try, + KEYWORD_catch, + KEYWORD_finally, + KEYWORD_public, + KEYWORD_private, + KEYWORD_static, + KEYWORD_class, + KEYWORD_id, + KEYWORD_script, + KEYWORD_cdata, + KEYWORD_mx +} keywordId; + +/* Used to determine whether keyword is valid for the token language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; +} keywordDesc; + +typedef enum eTokenType { + TOKEN_UNDEFINED, + TOKEN_CHARACTER, + TOKEN_CLOSE_PAREN, + TOKEN_SEMICOLON, + TOKEN_COLON, + TOKEN_COMMA, + TOKEN_KEYWORD, + TOKEN_OPEN_PAREN, + TOKEN_OPERATOR, + TOKEN_IDENTIFIER, + TOKEN_STRING, + TOKEN_PERIOD, + TOKEN_OPEN_CURLY, + TOKEN_CLOSE_CURLY, + TOKEN_EQUAL_SIGN, + TOKEN_EXCLAMATION, + TOKEN_FORWARD_SLASH, + TOKEN_OPEN_SQUARE, + TOKEN_CLOSE_SQUARE, + TOKEN_OPEN_MXML, + TOKEN_CLOSE_MXML, + TOKEN_CLOSE_SGML, + TOKEN_LESS_THAN, + TOKEN_GREATER_THAN, + TOKEN_QUESTION_MARK +} tokenType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + vString * string; + vString * scope; + unsigned long lineNumber; + fpos_t filePosition; + int nestLevel; + boolean ignoreTag; + boolean isClass; +} tokenInfo; + +/* + * DATA DEFINITIONS + */ + +static langType Lang_js; + +static jmp_buf Exception; + +typedef enum { + FLEXTAG_FUNCTION, + FLEXTAG_CLASS, + FLEXTAG_METHOD, + FLEXTAG_PROPERTY, + FLEXTAG_VARIABLE, + FLEXTAG_MXTAG, + FLEXTAG_COUNT +} flexKind; + +static kindOption FlexKinds [] = { + { TRUE, 'f', "function", "functions" }, + { TRUE, 'c', "class", "classes" }, + { TRUE, 'm', "method", "methods" }, + { TRUE, 'p', "property", "properties" }, + { TRUE, 'v', "variable", "global variables" }, + { TRUE, 'x', "mxtag", "mxtags" } +}; + +static const keywordDesc FlexKeywordTable [] = { + /* keyword keyword ID */ + { "function", KEYWORD_function }, + { "Function", KEYWORD_capital_function }, + { "object", KEYWORD_object }, + { "Object", KEYWORD_capital_object }, + { "prototype", KEYWORD_prototype }, + { "var", KEYWORD_var }, + { "new", KEYWORD_new }, + { "this", KEYWORD_this }, + { "for", KEYWORD_for }, + { "while", KEYWORD_while }, + { "do", KEYWORD_do }, + { "if", KEYWORD_if }, + { "else", KEYWORD_else }, + { "switch", KEYWORD_switch }, + { "try", KEYWORD_try }, + { "catch", KEYWORD_catch }, + { "finally", KEYWORD_finally }, + { "public", KEYWORD_public }, + { "private", KEYWORD_private }, + { "static", KEYWORD_static }, + { "class", KEYWORD_class }, + { "id", KEYWORD_id }, + { "script", KEYWORD_script }, + { "cdata", KEYWORD_cdata }, + { "mx", KEYWORD_mx } +}; + +/* + * FUNCTION DEFINITIONS + */ + +/* Recursive functions */ +static void parseFunction (tokenInfo *const token); +static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent); +static boolean parseLine (tokenInfo *const token); +static boolean parseActionScript (tokenInfo *const token); + +static boolean isIdentChar (const int c) +{ + return (boolean) + (isalpha (c) || isdigit (c) || c == '$' || + c == '@' || c == '_' || c == '#'); +} + +static void buildFlexKeywordHash (void) +{ + const size_t count = sizeof (FlexKeywordTable) / + sizeof (FlexKeywordTable [0]); + size_t i; + for (i = 0 ; i < count ; ++i) + { + const keywordDesc* const p = &FlexKeywordTable [i]; + addKeyword (p->name, Lang_js, (int) p->id); + } +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->string = vStringNew (); + token->scope = vStringNew (); + token->nestLevel = 0; + token->isClass = FALSE; + token->ignoreTag = FALSE; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + + return token; +} + +static void deleteToken (tokenInfo *const token) +{ + vStringDelete (token->string); + vStringDelete (token->scope); + eFree (token); +} + +/* + * Tag generation functions + */ + +static void makeConstTag (tokenInfo *const token, const flexKind kind) +{ + if (FlexKinds [kind].enabled && ! token->ignoreTag ) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + initTagEntry (&e, name); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.kindName = FlexKinds [kind].name; + e.kind = FlexKinds [kind].letter; + + makeTagEntry (&e); + } +} + +static void makeFlexTag (tokenInfo *const token, flexKind kind) +{ + vString * fulltag; + + if (FlexKinds [kind].enabled && ! token->ignoreTag ) + { + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n makeFlexTag start: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + if (kind == FLEXTAG_FUNCTION && token->isClass ) + { + kind = FLEXTAG_METHOD; + } + /* + * If a scope has been added to the token, change the token + * string to include the scope when making the tag. + */ + if ( vStringLength(token->scope) > 0 ) + { + fulltag = vStringNew (); + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + vStringTerminate(fulltag); + vStringCopy(token->string, fulltag); + vStringDelete (fulltag); + } + makeConstTag (token, kind); + } +} + +static void makeClassTag (tokenInfo *const token) +{ + vString * fulltag; + + if ( ! token->ignoreTag ) + { + fulltag = vStringNew (); + if (vStringLength (token->scope) > 0) + { + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + } + else + { + vStringCopy(fulltag, token->string); + } + vStringTerminate(fulltag); + if ( ! stringListHas(ClassNames, vStringValue (fulltag)) ) + { + stringListAdd (ClassNames, vStringNewCopy (fulltag)); + makeFlexTag (token, FLEXTAG_CLASS); + } + vStringDelete (fulltag); + } +} + +static void makeMXTag (tokenInfo *const token) +{ + vString * fulltag; + + if ( ! token->ignoreTag ) + { + fulltag = vStringNew (); + if (vStringLength (token->scope) > 0) + { + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + } + else + { + vStringCopy(fulltag, token->string); + } + vStringTerminate(fulltag); + makeFlexTag (token, FLEXTAG_MXTAG); + vStringDelete (fulltag); + } +} + +static void makeFunctionTag (tokenInfo *const token) +{ + vString * fulltag; + + if ( ! token->ignoreTag ) + { + fulltag = vStringNew (); + if (vStringLength (token->scope) > 0) + { + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + } + else + { + vStringCopy(fulltag, token->string); + } + vStringTerminate(fulltag); + if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) ) + { + stringListAdd (FunctionNames, vStringNewCopy (fulltag)); + makeFlexTag (token, FLEXTAG_FUNCTION); + } + vStringDelete (fulltag); + } +} + +/* + * Parsing functions + */ + +static void parseString (vString *const string, const int delimiter) +{ + boolean end = FALSE; + while (! end) + { + int c = fileGetc (); + if (c == EOF) + end = TRUE; + else if (c == '\\') + { + c = fileGetc(); /* This maybe a ' or ". */ + vStringPut(string, c); + } + else if (c == delimiter) + end = TRUE; + else + vStringPut (string, c); + } + vStringTerminate (string); +} + +/* Read a C identifier beginning with "firstChar" and places it into + * "name". + */ +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + Assert (isIdentChar (c)); + do + { + vStringPut (string, c); + c = fileGetc (); + } while (isIdentChar (c)); + vStringTerminate (string); + if (!isspace (c)) + fileUngetc (c); /* unget non-identifier character */ +} + +static void readToken (tokenInfo *const token) +{ + int c; + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar: + do + { + c = fileGetc (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + while (c == '\t' || c == ' ' || c == '\n'); + + switch (c) + { + case EOF: longjmp (Exception, (int)ExceptionEOF); break; + case '(': token->type = TOKEN_OPEN_PAREN; break; + case ')': token->type = TOKEN_CLOSE_PAREN; break; + case ';': token->type = TOKEN_SEMICOLON; break; + case ',': token->type = TOKEN_COMMA; break; + case '.': token->type = TOKEN_PERIOD; break; + case ':': token->type = TOKEN_COLON; break; + case '{': token->type = TOKEN_OPEN_CURLY; break; + case '}': token->type = TOKEN_CLOSE_CURLY; break; + case '=': token->type = TOKEN_EQUAL_SIGN; break; + case '[': token->type = TOKEN_OPEN_SQUARE; break; + case ']': token->type = TOKEN_CLOSE_SQUARE; break; + case '?': token->type = TOKEN_QUESTION_MARK; break; + + case '\'': + case '"': + token->type = TOKEN_STRING; + parseString (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '\\': + c = fileGetc (); + if (c != '\\' && c != '"' && !isspace (c)) + fileUngetc (c); + token->type = TOKEN_CHARACTER; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '/': + { + int d = fileGetc (); + if ( (d != '*') && /* is this the start of a comment? */ + (d != '/') && /* is a one line comment? */ + (d != '>') ) /* is this a close XML tag? */ + { + fileUngetc (d); + token->type = TOKEN_FORWARD_SLASH; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + else + { + if (d == '*') + { + do + { + fileSkipToCharacter ('*'); + c = fileGetc (); + if (c == '/') + break; + else + fileUngetc (c); + } while (c != EOF && c != '\0'); + goto getNextChar; + } + else if (d == '/') /* is this the start of a comment? */ + { + fileSkipToCharacter ('\n'); + goto getNextChar; + } + else if (d == '>') /* is this the start of a comment? */ + { + token->type = TOKEN_CLOSE_SGML; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + } + break; + } + + case '<': + { + /* + * An XML comment looks like this + * + */ + int d = fileGetc (); + + if ( (d != '!' ) && /* is this the start of a comment? */ + (d != '/' ) && /* is this the start of a closing mx tag */ + (d != 'm' ) ) /* is this the start of a mx tag */ + { + fileUngetc (d); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + + } + else + { + if (d == '!') + { + int e = fileGetc (); + if ( e != '-' ) /* is this the start of a comment? */ + { + fileUngetc (e); + fileUngetc (d); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + else + { + if (e == '-') + { + int f = fileGetc (); + if ( f != '-' ) /* is this the start of a comment? */ + { + fileUngetc (f); + fileUngetc (e); + fileUngetc (d); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + else + { + if (f == '-') + { + do + { + fileSkipToCharacter ('-'); + c = fileGetc (); + if (c == '-') + { + d = fileGetc (); + if (d == '>') + break; + else + { + fileUngetc (d); + fileUngetc (c); + } + break; + } + else + fileUngetc (c); + } while (c != EOF && c != '\0'); + goto getNextChar; + } + } + } + } + } + else if (d == 'm') + { + int e = fileGetc (); + if ( e != 'x' ) /* continuing an mx tag */ + { + fileUngetc (e); + fileUngetc (d); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + else + { + if (e == 'x') + { + int f = fileGetc (); + if ( f != ':' ) /* is this the start of a comment? */ + { + fileUngetc (f); + fileUngetc (e); + fileUngetc (d); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + else + { + if (f == ':') + { + token->type = TOKEN_OPEN_MXML; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + } + } + } + } + else if (d == '/') + { + int e = fileGetc (); + if ( e != 'm' ) /* continuing an mx tag */ + { + fileUngetc (e); + fileUngetc (d); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + else + { + int f = fileGetc (); + if ( f != 'x' ) /* continuing an mx tag */ + { + fileUngetc (f); + fileUngetc (e); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + else + { + if (f == 'x') + { + int g = fileGetc (); + if ( g != ':' ) /* is this the start of a comment? */ + { + fileUngetc (g); + fileUngetc (f); + fileUngetc (e); + token->type = TOKEN_LESS_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + else + { + if (g == ':') + { + token->type = TOKEN_CLOSE_MXML; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + } + } + } + } + } + } + break; + } + + case '>': + token->type = TOKEN_GREATER_THAN; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '!': + token->type = TOKEN_EXCLAMATION; + /*token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition ();*/ + break; + + default: + if (! isIdentChar (c)) + token->type = TOKEN_UNDEFINED; + else + { + parseIdentifier (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + token->keyword = analyzeToken (token->string, Lang_js); + if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + break; + } +} + +static void copyToken (tokenInfo *const dest, tokenInfo *const src) +{ + dest->nestLevel = src->nestLevel; + dest->lineNumber = src->lineNumber; + dest->filePosition = src->filePosition; + dest->type = src->type; + dest->keyword = src->keyword; + dest->isClass = src->isClass; + vStringCopy(dest->string, src->string); + vStringCopy(dest->scope, src->scope); +} + +/* + * Token parsing functions + */ + +static void skipArgumentList (tokenInfo *const token) +{ + int nest_level = 0; + + /* + * Other databases can have arguments with fully declared + * datatypes: + * ( name varchar(30), text binary(10) ) + * So we must check for nested open and closing parantheses + */ + + if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */ + { + nest_level++; + while (! (isType (token, TOKEN_CLOSE_PAREN) && (nest_level == 0))) + { + readToken (token); + if (isType (token, TOKEN_OPEN_PAREN)) + { + nest_level++; + } + if (isType (token, TOKEN_CLOSE_PAREN)) + { + if (nest_level > 0) + { + nest_level--; + } + } + } + readToken (token); + } +} + +static void skipArrayList (tokenInfo *const token) +{ + int nest_level = 0; + + /* + * Handle square brackets + * var name[1] + * So we must check for nested open and closing square brackets + */ + + if (isType (token, TOKEN_OPEN_SQUARE)) /* arguments? */ + { + nest_level++; + while (! (isType (token, TOKEN_CLOSE_SQUARE) && (nest_level == 0))) + { + readToken (token); + if (isType (token, TOKEN_OPEN_SQUARE)) + { + nest_level++; + } + if (isType (token, TOKEN_CLOSE_SQUARE)) + { + if (nest_level > 0) + { + nest_level--; + } + } + } + readToken (token); + } +} + +static void addContext (tokenInfo* const parent, const tokenInfo* const child) +{ + if (vStringLength (parent->string) > 0) + { + vStringCatS (parent->string, "."); + } + vStringCatS (parent->string, vStringValue(child->string)); + vStringTerminate(parent->string); +} + +static void addToScope (tokenInfo* const token, vString* const extra) +{ + if (vStringLength (token->scope) > 0) + { + vStringCatS (token->scope, "."); + } + vStringCatS (token->scope, vStringValue(extra)); + vStringTerminate(token->scope); +} + +/* + * Scanning functions + */ + +static void findCmdTerm (tokenInfo *const token) +{ + /* + * Read until we find either a semicolon or closing brace. + * Any nested braces will be handled within. + */ + while (! ( isType (token, TOKEN_SEMICOLON) || + isType (token, TOKEN_CLOSE_CURLY) ) ) + { + /* Handle nested blocks */ + if ( isType (token, TOKEN_OPEN_CURLY)) + { + parseBlock (token, token); + } + else if ( isType (token, TOKEN_OPEN_PAREN) ) + { + skipArgumentList(token); + } + else + { + readToken (token); + } + } +} + +static void parseSwitch (tokenInfo *const token) +{ + /* + * switch (expression){ + * case value1: + * statement; + * break; + * case value2: + * statement; + * break; + * default : statement; + * } + */ + + readToken (token); + + if (isType (token, TOKEN_OPEN_PAREN)) + { + skipArgumentList(token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + do + { + readToken (token); + } while (! (isType (token, TOKEN_CLOSE_SGML) || + isType (token, TOKEN_CLOSE_MXML) || + isType (token, TOKEN_CLOSE_CURLY) || + isType (token, TOKEN_GREATER_THAN)) ); + } + +} + +static void parseLoop (tokenInfo *const token) +{ + /* + * Handles these statements + * for (x=0; x<3; x++) + * document.write("This text is repeated three times
"); + * + * for (x=0; x<3; x++) + * { + * document.write("This text is repeated three times
"); + * } + * + * while (number<5){ + * document.write(number+"
"); + * number++; + * } + * + * do{ + * document.write(number+"
"); + * number++; + * } + * while (number<5); + */ + + if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while)) + { + readToken(token); + + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions, these will only + * be considered methods. + */ + skipArgumentList(token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + parseBlock (token, token); + } + else + { + parseLine(token); + } + } + else if (isKeyword (token, KEYWORD_do)) + { + readToken(token); + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + parseBlock (token, token); + } + else + { + parseLine(token); + } + + readToken(token); + + if (isKeyword (token, KEYWORD_while)) + { + readToken(token); + + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions, these will only + * be considered methods. + */ + skipArgumentList(token); + } + } + } +} + +static boolean parseIf (tokenInfo *const token) +{ + boolean read_next_token = TRUE; + /* + * If statements have two forms + * if ( ... ) + * one line; + * + * if ( ... ) + * statement; + * else + * statement + * + * if ( ... ) { + * multiple; + * statements; + * } + * + * + * if ( ... ) { + * return elem + * } + * + * This example if correctly written, but the + * else contains only 1 statement without a terminator + * since the function finishes with the closing brace. + * + * function a(flag){ + * if(flag) + * test(1); + * else + * test(2) + * } + * + * TODO: Deal with statements that can optional end + * without a semi-colon. Currently this messes up + * the parsing of blocks. + * Need to somehow detect this has happened, and either + * backup a token, or skip reading the next token if + * that is possible from all code locations. + * + */ + + readToken (token); + + if (isKeyword (token, KEYWORD_if)) + { + /* + * Check for an "else if" and consume the "if" + */ + readToken (token); + } + + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions, these will only + * be considered methods. + */ + skipArgumentList(token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + parseBlock (token, token); + } + else + { + findCmdTerm (token); + + /* + * The IF could be followed by an ELSE statement. + * This too could have two formats, a curly braced + * multiline section, or another single line. + */ + + if (isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * This statement did not have a line terminator. + */ + read_next_token = FALSE; + } + else + { + readToken (token); + + if (isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * This statement did not have a line terminator. + */ + read_next_token = FALSE; + } + else + { + if (isKeyword (token, KEYWORD_else)) + read_next_token = parseIf (token); + } + } + } + return read_next_token; +} + +static void parseFunction (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * private static function ioErrorHandler( event:IOErrorEvent ):void { + */ + + if ( isKeyword(token, KEYWORD_function) ) + { + readToken (token); + } + + copyToken (name, token); + /* Add scope in case this is an INNER function + addToScope(name, token->scope); + */ + + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseFunction: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseFunction: name isClass:%d scope:%s name:%s\n" + , name->isClass + , vStringValue(name->scope) + , vStringValue(name->string) + ); + ); + + readToken (token); + + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if ( isType (token, TOKEN_COLON) ) + { + /* + * function fname ():ReturnType + */ + readToken (token); + readToken (token); + } + + if ( isType (token, TOKEN_OPEN_CURLY) ) + { + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseFunction end: name isClass:%d scope:%s name:%s\n" + , name->isClass + , vStringValue(name->scope) + , vStringValue(name->string) + ); + ); + parseBlock (token, name); + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseFunction end2: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseFunction end2: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseFunction end3: name isClass:%d scope:%s name:%s\n" + , name->isClass + , vStringValue(name->scope) + , vStringValue(name->string) + ); + ); + makeFunctionTag (name); + } + + findCmdTerm (token); + + deleteToken (name); +} + +static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent) +{ + boolean read_next_token = TRUE; + vString * saveScope = vStringNew (); + + vStringClear(saveScope); + vStringCopy (saveScope, token->scope); + token->nestLevel++; + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseBlock start: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + /* + * Make this routine a bit more forgiving. + * If called on an open_curly advance it + */ + if ( isType (token, TOKEN_OPEN_CURLY) && + isKeyword(token, KEYWORD_NONE) ) + readToken(token); + + if (! isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * Read until we find the closing brace, + * any nested braces will be handled within + */ + do + { + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* Handle nested blocks */ + parseBlock (token, parent); + } + else + { + /* + * It is possible for a line to have no terminator + * if the following line is a closing brace. + * parseLine will detect this case and indicate + * whether we should read an additional token. + */ + read_next_token = parseLine (token); + } + + /* + * Always read a new token unless we find a statement without + * a ending terminator + */ + if( read_next_token ) + readToken(token); + + /* + * If we find a statement without a terminator consider the + * block finished, otherwise the stack will be off by one. + */ + } while (! isType (token, TOKEN_CLOSE_CURLY) && read_next_token ); + } + + vStringDelete(saveScope); + token->nestLevel--; + + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseBlock end: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + return FALSE; +} + +static void parseMethods (tokenInfo *const token, tokenInfo *const class) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * validProperty : 2, + * validMethod : function(a,b) {} + * 'validMethod2' : function(a,b) {} + * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false} + */ + + do + { + readToken (token); + if (isType (token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE)) + { + copyToken (name, token); + + readToken (token); + if ( isType (token, TOKEN_COLON) ) + { + readToken (token); + if ( isKeyword (token, KEYWORD_function) ) + { + readToken (token); + if ( isType (token, TOKEN_OPEN_PAREN) ) + { + skipArgumentList(token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + addToScope (name, class->string); + makeFlexTag (name, FLEXTAG_METHOD); + parseBlock (token, name); + + /* + * Read to the closing curly, check next + * token, if a comma, we must loop again + */ + readToken (token); + } + } + else + { + addToScope (name, class->string); + makeFlexTag (name, FLEXTAG_PROPERTY); + + /* + * Read the next token, if a comma + * we must loop again + */ + readToken (token); + } + } + } + } while ( isType(token, TOKEN_COMMA) ); + + findCmdTerm (token); + + deleteToken (name); +} + +static boolean parseVar (tokenInfo *const token, boolean is_public) +{ + tokenInfo *const name = newToken (); + tokenInfo *const secondary_name = newToken (); + vString * saveScope = vStringNew (); + boolean is_terminated = TRUE; + + vStringClear(saveScope); + vStringCopy (saveScope, token->scope); + /* + * Variables are defined as: + * private static var lastFaultMessage:Date = new Date( 0 ); + * private static var webRequests:ArrayCollection = new ArrayCollection(); + */ + + if ( isKeyword(token, KEYWORD_var) ) + { + readToken(token); + } + + /* Variable name */ + copyToken (name, token); + readToken(token); + + if ( isType (token, TOKEN_COLON) ) + { + /* + * var vname ():DataType = new Date(); + * var vname ():DataType; + */ + readToken (token); + readToken (token); + } + + while (! isType (token, TOKEN_SEMICOLON) ) + { + readToken (token); + } + + if ( isType (token, TOKEN_SEMICOLON) ) + { + /* + * Only create variables for global scope + */ + /* if ( token->nestLevel == 0 && is_global ) */ + if ( is_public ) + { + if (isType (token, TOKEN_SEMICOLON)) + makeFlexTag (name, FLEXTAG_VARIABLE); + } + } + + vStringCopy(token->scope, saveScope); + deleteToken (name); + deleteToken (secondary_name); + vStringDelete(saveScope); + + return is_terminated; +} + +static boolean parseClass (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + vString * saveScope = vStringNew (); + boolean saveIsClass = token->isClass; + + vStringClear(saveScope); + vStringCopy (saveScope, token->scope); + /* + * Variables are defined as: + * private static var lastFaultMessage:Date = new Date( 0 ); + * private static var webRequests:ArrayCollection = new ArrayCollection(); + */ + + if ( isKeyword(token, KEYWORD_class) ) + { + readToken(token); + } + + token->isClass = TRUE; + /* Add class name to scope */ + addToScope(token, token->string); + /* Class name */ + copyToken (name, token); + readToken(token); + + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseClass start: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + if ( isType (token, TOKEN_OPEN_CURLY) ) + { + makeClassTag (name); + parseBlock (token, name); + } + + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseClass end: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + vStringCopy(token->scope, saveScope); + token->isClass = saveIsClass; + deleteToken (name); + vStringDelete(saveScope); + + return TRUE; +} + +static boolean parseStatement (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + tokenInfo *const secondary_name = newToken (); + vString * saveScope = vStringNew (); + boolean is_public = FALSE; + boolean is_class = FALSE; + boolean is_terminated = TRUE; + boolean is_global = FALSE; + boolean is_prototype = FALSE; + vString * fulltag; + + vStringClear(saveScope); + vStringCopy (saveScope, token->scope); + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n parseStatement: token isClass:%d scope:%s name:%s\n" + , token->isClass + , vStringValue(token->scope) + , vStringValue(token->string) + ); + ); + /* + * Functions can be named or unnamed. + * This deals with these formats: + * Function + * validFunctionOne = function(a,b) {} + * testlib.validFunctionFive = function(a,b) {} + * var innerThree = function(a,b) {} + * var innerFour = (a,b) {} + * var D2 = secondary_fcn_name(a,b) {} + * var D3 = new Function("a", "b", "return a+b;"); + * Class + * testlib.extras.ValidClassOne = function(a,b) { + * this.a = a; + * } + * Class Methods + * testlib.extras.ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + * ValidClassTwo = function () + * { + * this.validMethodThree = function() {} + * // unnamed method + * this.validMethodFour = () {} + * } + * Database.prototype.validMethodThree = Database_getTodaysDate; + */ + + if ( isKeyword(token, KEYWORD_public) ) + { + is_public = TRUE; + readToken(token); + } + + if ( isKeyword(token, KEYWORD_private) ) + { + readToken(token); + } + + if ( isKeyword(token, KEYWORD_static) ) + { + readToken(token); + } + + if (isType(token, TOKEN_KEYWORD)) + { + switch (token->keyword) + { + case KEYWORD_for: + case KEYWORD_while: + case KEYWORD_do: + parseLoop (token); + break; + case KEYWORD_if: + case KEYWORD_else: + case KEYWORD_try: + case KEYWORD_catch: + case KEYWORD_finally: + /* Common semantics */ + is_terminated = parseIf (token); + break; + case KEYWORD_switch: + parseSwitch (token); + break; + case KEYWORD_class: + parseClass (token); + return is_terminated; + break; + case KEYWORD_function: + parseFunction (token); + return is_terminated; + break; + case KEYWORD_var: + parseVar (token, is_public); + return is_terminated; + break; + default: + readToken(token); + break; + } + } + + copyToken (name, token); + + while (! isType (token, TOKEN_CLOSE_CURLY) && + ! isType (token, TOKEN_SEMICOLON) && + ! isType (token, TOKEN_EQUAL_SIGN) ) + { + /* Potentially the name of the function */ + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + /* + * Cannot be a global variable is it has dot references in the name + */ + is_global = FALSE; + do + { + readToken (token); + if ( isKeyword(token, KEYWORD_NONE) ) + { + if ( is_class ) + { + vStringCopy(saveScope, token->scope); + addToScope(token, name->string); + } + else + addContext (name, token); + } + else if ( isKeyword(token, KEYWORD_prototype) ) + { + /* + * When we reach the "prototype" tag, we infer: + * "BindAgent" is a class + * "build" is a method + * + * function BindAgent( repeatableIdName, newParentIdName ) { + * } + * + * CASE 1 + * Specified function name: "build" + * BindAgent.prototype.build = function( mode ) { + * ignore everything within this function + * } + * + * CASE 2 + * Prototype listing + * ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + * + */ + makeClassTag (name); + is_class = TRUE; + is_prototype = TRUE; + + /* + * There should a ".function_name" next. + */ + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + /* + * Handle CASE 1 + */ + readToken (token); + if ( isKeyword(token, KEYWORD_NONE) ) + { + vStringCopy(saveScope, token->scope); + addToScope(token, name->string); + + makeFlexTag (token, FLEXTAG_METHOD); + /* + * We can read until the end of the block / statement. + * We need to correctly parse any nested blocks, but + * we do NOT want to create any tags based on what is + * within the blocks. + */ + token->ignoreTag = TRUE; + /* + * Find to the end of the statement + */ + findCmdTerm (token); + token->ignoreTag = FALSE; + is_terminated = TRUE; + goto cleanUp; + } + } + else if (isType (token, TOKEN_EQUAL_SIGN)) + { + readToken (token); + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * Handle CASE 2 + * + * Creates tags for each of these class methods + * ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + */ + parseMethods(token, name); + /* + * Find to the end of the statement + */ + findCmdTerm (token); + token->ignoreTag = FALSE; + is_terminated = TRUE; + goto cleanUp; + } + } + } + readToken (token); + } while (isType (token, TOKEN_PERIOD)); + } + + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if ( isType (token, TOKEN_COLON) ) + { + /* + * Functions are of this form: + * function fname ():ReturnType { + */ + readToken (token); + readToken (token); + } + + if ( isType (token, TOKEN_OPEN_SQUARE) ) + skipArrayList(token); + + } + + if ( isType (token, TOKEN_CLOSE_CURLY) ) + { + /* + * Reaching this section without having + * processed an open curly brace indicates + * the statement is most likely not terminated. + */ + is_terminated = FALSE; + goto cleanUp; + } + + if ( isType (token, TOKEN_SEMICOLON) ) + { + /* + * Only create variables for global scope + */ + if ( token->nestLevel == 0 && is_global ) + { + /* + * Handles this syntax: + * var g_var2; + */ + if (isType (token, TOKEN_SEMICOLON)) + makeFlexTag (name, FLEXTAG_VARIABLE); + } + /* + * Statement has ended. + * This deals with calls to functions, like: + * alert(..); + */ + goto cleanUp; + } + + if ( isType (token, TOKEN_EQUAL_SIGN) ) + { + readToken (token); + + if ( isKeyword (token, KEYWORD_function) ) + { + readToken (token); + + if ( isKeyword (token, KEYWORD_NONE) && + ! isType (token, TOKEN_OPEN_PAREN) ) + { + /* + * Functions of this format: + * var D2A = function theAdd(a, b) + * { + * return a+b; + * } + * Are really two separate defined functions and + * can be referenced in two ways: + * alert( D2A(1,2) ); // produces 3 + * alert( theAdd(1,2) ); // also produces 3 + * So it must have two tags: + * D2A + * theAdd + * Save the reference to the name for later use, once + * we have established this is a valid function we will + * create the secondary reference to it. + */ + copyToken (secondary_name, token); + readToken (token); + } + + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + if ( token->isClass ) + { + makeFlexTag (name, FLEXTAG_METHOD); + if ( vStringLength(secondary_name->string) > 0 ) + makeFunctionTag (secondary_name); + parseBlock (token, name); + } + else + { + parseBlock (token, name); + makeFunctionTag (name); + + if ( vStringLength(secondary_name->string) > 0 ) + makeFunctionTag (secondary_name); + + /* + * Find to the end of the statement + */ + goto cleanUp; + } + } + } + else if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions + * this.method_name = () {} + */ + skipArgumentList(token); + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * Nameless functions are only setup as methods. + */ + makeFlexTag (name, FLEXTAG_METHOD); + parseBlock (token, name); + } + } + else if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * Creates tags for each of these class methods + * ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + */ + parseMethods(token, name); + if (isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * Assume the closing parantheses terminates + * this statements. + */ + is_terminated = TRUE; + } + } + else if (isKeyword (token, KEYWORD_new)) + { + readToken (token); + if ( isKeyword (token, KEYWORD_function) || + isKeyword (token, KEYWORD_capital_function) || + isKeyword (token, KEYWORD_object) || + isKeyword (token, KEYWORD_capital_object) ) + { + if ( isKeyword (token, KEYWORD_object) || + isKeyword (token, KEYWORD_capital_object) ) + is_class = TRUE; + + readToken (token); + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if (isType (token, TOKEN_SEMICOLON)) + { + if ( token->nestLevel == 0 ) + { + if ( is_class ) + { + makeClassTag (name); + } else { + makeFunctionTag (name); + } + } + } + } + } + else if (isKeyword (token, KEYWORD_NONE)) + { + /* + * Only create variables for global scope + */ + if ( token->nestLevel == 0 && is_global ) + { + /* + * A pointer can be created to the function. + * If we recognize the function/class name ignore the variable. + * This format looks identical to a variable definition. + * A variable defined outside of a block is considered + * a global variable: + * var g_var1 = 1; + * var g_var2; + * This is not a global variable: + * var g_var = function; + * This is a global variable: + * var g_var = different_var_name; + */ + fulltag = vStringNew (); + if (vStringLength (token->scope) > 0) + { + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + } + else + { + vStringCopy(fulltag, token->string); + } + vStringTerminate(fulltag); + if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) && + ! stringListHas(ClassNames, vStringValue (fulltag)) ) + { + findCmdTerm (token); + if (isType (token, TOKEN_SEMICOLON)) + makeFlexTag (name, FLEXTAG_VARIABLE); + } + vStringDelete (fulltag); + } + } + } + findCmdTerm (token); + + /* + * Statements can be optionally terminated in the case of + * statement prior to a close curly brace as in the + * document.write line below: + * + * function checkForUpdate() { + * if( 1==1 ) { + * document.write("hello from checkForUpdate
") + * } + * return 1; + * } + */ + if ( ! is_terminated && isType (token, TOKEN_CLOSE_CURLY)) + is_terminated = FALSE; + + +cleanUp: + vStringCopy(token->scope, saveScope); + deleteToken (name); + deleteToken (secondary_name); + vStringDelete(saveScope); + + return is_terminated; +} + +static boolean parseLine (tokenInfo *const token) +{ + boolean is_terminated = TRUE; + /* + * Detect the common statements, if, while, for, do, ... + * This is necessary since the last statement within a block "{}" + * can be optionally terminated. + * + * If the statement is not terminated, we need to tell + * the calling routine to prevent reading an additional token + * looking for the end of the statement. + */ + + if (isType(token, TOKEN_KEYWORD)) + { + switch (token->keyword) + { + case KEYWORD_for: + case KEYWORD_while: + case KEYWORD_do: + parseLoop (token); + break; + case KEYWORD_if: + case KEYWORD_else: + case KEYWORD_try: + case KEYWORD_catch: + case KEYWORD_finally: + /* Common semantics */ + is_terminated = parseIf (token); + break; + case KEYWORD_switch: + parseSwitch (token); + break; + default: + parseStatement (token); + break; + } + } + else + { + /* + * Special case where single line statements may not be + * SEMICOLON terminated. parseBlock needs to know this + * so that it does not read the next token. + */ + is_terminated = parseStatement (token); + } + return is_terminated; +} + +static boolean parseCDATA (tokenInfo *const token) +{ + if (isType (token, TOKEN_LESS_THAN)) + { + /* + * Handle these tags + * + */ + readToken (token); + if (isType (token, TOKEN_EXCLAMATION)) + { + /* + * Not sure why I had to comment these out, but I did. + * readToken (token); + * if (isType (token, TOKEN_OPEN_SQUARE)) + * { + */ + readToken (token); + if (isKeyword (token, KEYWORD_cdata)) + { + readToken (token); + if (isType (token, TOKEN_OPEN_SQUARE)) + { + parseActionScript (token); + if (isType (token, TOKEN_CLOSE_SQUARE)) + { + readToken (token); + if (isType (token, TOKEN_CLOSE_SQUARE)) + { + readToken (token); + } + } + } + } + /*} Not sure */ + } + } + else + { + parseActionScript (token); + } + return TRUE; +} + +static boolean parseMXML (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + tokenInfo *const type = newToken (); + /* + * Detect the common statements, if, while, for, do, ... + * This is necessary since the last statement within a block "{}" + * can be optionally terminated. + * + * If the statement is not terminated, we need to tell + * the calling routine to prevent reading an additional token + * looking for the end of the statement. + */ + + readToken (token); + + if (isKeyword (token, KEYWORD_script)) + { + /* + * These tags can be of this form: + * + */ + do + { + readToken (token); + } while (! (isType (token, TOKEN_CLOSE_SGML) || + isType (token, TOKEN_CLOSE_MXML) || + isType (token, TOKEN_GREATER_THAN)) ); + + if (isType (token, TOKEN_CLOSE_MXML)) + { + /* + * We have found a tag + * Finish reading the "type" and ">" + */ + readToken (token); + readToken (token); + goto cleanUp; + } + if (isType (token, TOKEN_CLOSE_SGML)) + { + /* + * We have found a + */ + goto cleanUp; + } + + /* + * This is a beginning of an embedded script. + * These typically are of this format: + * + * + * + */ + readToken (token); + parseCDATA (token); + + readToken (token); + if (isType (token, TOKEN_CLOSE_MXML)) + { + /* + * We have found a tag + * Finish reading the "type" and ">" + */ + readToken (token); + readToken (token); + } + goto cleanUp; + } + + copyToken (type, token); + + readToken (token); + do + { + if (isType (token, TOKEN_OPEN_MXML)) + { + parseMXML (token); + } + else if (isKeyword (token, KEYWORD_id)) + { + /* = */ + readToken (token); + readToken (token); + + copyToken (name, token); + addToScope (name, type->string); + makeMXTag (name); + } + readToken (token); + } while (! (isType (token, TOKEN_CLOSE_SGML) || isType (token, TOKEN_CLOSE_MXML)) ); + + if (isType (token, TOKEN_CLOSE_MXML)) + { + /* + * We have found a tag + * Finish reading the "type" and ">" + */ + readToken (token); + readToken (token); + } + +cleanUp: + deleteToken (name); + deleteToken (type); + return TRUE; +} + +static boolean parseActionScript (tokenInfo *const token) +{ + do + { + readToken (token); + + if (isType (token, TOKEN_LESS_THAN)) + { + /* + * Handle these tags + * + */ + readToken (token); + if (isType (token, TOKEN_EQUAL_SIGN)) + { + if (isType (token, TOKEN_OPEN_SQUARE)) + { + readToken (token); + if (isKeyword (token, KEYWORD_cdata)) + { + readToken (token); + } + } + } + } + if (isType (token, TOKEN_CLOSE_SQUARE)) + { + /* + * Handle these tags + * + */ + readToken (token); + if (isType (token, TOKEN_CLOSE_SQUARE)) + { + readToken (token); + if (isType (token, TOKEN_GREATER_THAN)) + { + return TRUE; + } + } + } + else if (isType (token, TOKEN_CLOSE_MXML)) + { + /* + * Read the Script> tags + */ + readToken (token); + readToken (token); + return TRUE; + } + else if (isType (token, TOKEN_OPEN_MXML)) + { + parseMXML (token); + } + else + { + if (isType(token, TOKEN_KEYWORD)) + { + switch (token->keyword) + { + case KEYWORD_function: parseFunction (token); break; + default: parseLine (token); break; + } + } + else + { + parseLine (token); + } + } + } while (TRUE); +} + +static void parseFlexFile (tokenInfo *const token) +{ + do + { + readToken (token); + + if (isType (token, TOKEN_OPEN_MXML)) + { + parseMXML (token); + } + if (isType (token, TOKEN_LESS_THAN)) + { + readToken (token); + if (isType (token, TOKEN_QUESTION_MARK)) + { + readToken (token); + while (! isType (token, TOKEN_QUESTION_MARK) ) + { + readToken (token); + } + readToken (token); + } + } + else + { + parseActionScript (token); + } + } while (TRUE); +} + +static void initialize (const langType language) +{ + Assert (sizeof (FlexKinds) / sizeof (FlexKinds [0]) == FLEXTAG_COUNT); + Lang_js = language; + buildFlexKeywordHash (); +} + +static void findFlexTags (void) +{ + tokenInfo *const token = newToken (); + exception_t exception; + + ClassNames = stringListNew (); + FunctionNames = stringListNew (); + + exception = (exception_t) (setjmp (Exception)); + while (exception == ExceptionNone) + parseFlexFile (token); + + stringListDelete (ClassNames); + stringListDelete (FunctionNames); + ClassNames = NULL; + FunctionNames = NULL; + deleteToken (token); +} + +/* Create parser definition stucture */ +extern parserDefinition* FlexParser (void) +{ + static const char *const extensions [] = { "as", "mxml", NULL }; + parserDefinition *const def = parserNew ("Flex"); + def->extensions = extensions; + /* + * New definitions for parsing instead of regex + */ + def->kinds = FlexKinds; + def->kindCount = KIND_COUNT (FlexKinds); + def->parser = findFlexTags; + def->initialize = initialize; + + return def; +} +/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */ diff --git a/fortran.c b/fortran.c new file mode 100644 index 0000000..2a6f85c --- /dev/null +++ b/fortran.c @@ -0,0 +1,2197 @@ +/* +* $Id: fortran.c 660 2008-04-20 23:30:12Z elliotth $ +* +* Copyright (c) 1998-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Fortran language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include +#include +#include /* to define tolower () */ +#include + +#include "debug.h" +#include "entry.h" +#include "keyword.h" +#include "options.h" +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* +* MACROS +*/ +#define isident(c) (isalnum(c) || (c) == '_') +#define isBlank(c) (boolean) (c == ' ' || c == '\t') +#define isType(token,t) (boolean) ((token)->type == (t)) +#define isKeyword(token,k) (boolean) ((token)->keyword == (k)) +#define isSecondaryKeyword(token,k) (boolean) ((token)->secondary == NULL ? \ + FALSE : (token)->secondary->keyword == (k)) + +/* +* DATA DECLARATIONS +*/ + +typedef enum eException { + ExceptionNone, ExceptionEOF, ExceptionFixedFormat, ExceptionLoop +} exception_t; + +/* Used to designate type of line read in fixed source form. + */ +typedef enum eFortranLineType { + LTYPE_UNDETERMINED, + LTYPE_INVALID, + LTYPE_COMMENT, + LTYPE_CONTINUATION, + LTYPE_EOF, + LTYPE_INITIAL, + LTYPE_SHORT +} lineType; + +/* Used to specify type of keyword. + */ +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_allocatable, + KEYWORD_assignment, + KEYWORD_automatic, + KEYWORD_block, + KEYWORD_byte, + KEYWORD_cexternal, + KEYWORD_cglobal, + KEYWORD_character, + KEYWORD_common, + KEYWORD_complex, + KEYWORD_contains, + KEYWORD_data, + KEYWORD_dimension, + KEYWORD_dllexport, + KEYWORD_dllimport, + KEYWORD_do, + KEYWORD_double, + KEYWORD_elemental, + KEYWORD_end, + KEYWORD_entry, + KEYWORD_equivalence, + KEYWORD_external, + KEYWORD_format, + KEYWORD_function, + KEYWORD_if, + KEYWORD_implicit, + KEYWORD_include, + KEYWORD_inline, + KEYWORD_integer, + KEYWORD_intent, + KEYWORD_interface, + KEYWORD_intrinsic, + KEYWORD_logical, + KEYWORD_map, + KEYWORD_module, + KEYWORD_namelist, + KEYWORD_operator, + KEYWORD_optional, + KEYWORD_parameter, + KEYWORD_pascal, + KEYWORD_pexternal, + KEYWORD_pglobal, + KEYWORD_pointer, + KEYWORD_precision, + KEYWORD_private, + KEYWORD_program, + KEYWORD_public, + KEYWORD_pure, + KEYWORD_real, + KEYWORD_record, + KEYWORD_recursive, + KEYWORD_save, + KEYWORD_select, + KEYWORD_sequence, + KEYWORD_static, + KEYWORD_stdcall, + KEYWORD_structure, + KEYWORD_subroutine, + KEYWORD_target, + KEYWORD_then, + KEYWORD_type, + KEYWORD_union, + KEYWORD_use, + KEYWORD_value, + KEYWORD_virtual, + KEYWORD_volatile, + KEYWORD_where, + KEYWORD_while +} keywordId; + +/* Used to determine whether keyword is valid for the token language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; +} keywordDesc; + +typedef enum eTokenType { + TOKEN_UNDEFINED, + TOKEN_COMMA, + TOKEN_DOUBLE_COLON, + TOKEN_IDENTIFIER, + TOKEN_KEYWORD, + TOKEN_LABEL, + TOKEN_NUMERIC, + TOKEN_OPERATOR, + TOKEN_PAREN_CLOSE, + TOKEN_PAREN_OPEN, + TOKEN_PERCENT, + TOKEN_STATEMENT_END, + TOKEN_STRING +} tokenType; + +typedef enum eTagType { + TAG_UNDEFINED = -1, + TAG_BLOCK_DATA, + TAG_COMMON_BLOCK, + TAG_ENTRY_POINT, + TAG_FUNCTION, + TAG_INTERFACE, + TAG_COMPONENT, + TAG_LABEL, + TAG_LOCAL, + TAG_MODULE, + TAG_NAMELIST, + TAG_PROGRAM, + TAG_SUBROUTINE, + TAG_DERIVED_TYPE, + TAG_VARIABLE, + TAG_COUNT /* must be last */ +} tagType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + tagType tag; + vString* string; + struct sTokenInfo *secondary; + unsigned long lineNumber; + fpos_t filePosition; +} tokenInfo; + +/* +* DATA DEFINITIONS +*/ + +static langType Lang_fortran; +static jmp_buf Exception; +static int Ungetc; +static unsigned int Column; +static boolean FreeSourceForm; +static boolean ParsingString; +static tokenInfo *Parent; + +/* indexed by tagType */ +static kindOption FortranKinds [] = { + { TRUE, 'b', "block data", "block data"}, + { TRUE, 'c', "common", "common blocks"}, + { TRUE, 'e', "entry", "entry points"}, + { TRUE, 'f', "function", "functions"}, + { FALSE, 'i', "interface", "interface contents, generic names, and operators"}, + { TRUE, 'k', "component", "type and structure components"}, + { TRUE, 'l', "label", "labels"}, + { FALSE, 'L', "local", "local, common block, and namelist variables"}, + { TRUE, 'm', "module", "modules"}, + { TRUE, 'n', "namelist", "namelists"}, + { TRUE, 'p', "program", "programs"}, + { TRUE, 's', "subroutine", "subroutines"}, + { TRUE, 't', "type", "derived types and structures"}, + { TRUE, 'v', "variable", "program (global) and module variables"} +}; + +/* For efinitions of Fortran 77 with extensions: + * http://www.fortran.com/fortran/F77_std/rjcnf0001.html + * http://scienide.uwaterloo.ca/MIPSpro7/007-2362-004/sgi_html/index.html + * + * For the Compaq Fortran Reference Manual: + * http://h18009.www1.hp.com/fortran/docs/lrm/dflrm.htm + */ + +static const keywordDesc FortranKeywordTable [] = { + /* keyword keyword ID */ + { "allocatable", KEYWORD_allocatable }, + { "assignment", KEYWORD_assignment }, + { "automatic", KEYWORD_automatic }, + { "block", KEYWORD_block }, + { "byte", KEYWORD_byte }, + { "cexternal", KEYWORD_cexternal }, + { "cglobal", KEYWORD_cglobal }, + { "character", KEYWORD_character }, + { "common", KEYWORD_common }, + { "complex", KEYWORD_complex }, + { "contains", KEYWORD_contains }, + { "data", KEYWORD_data }, + { "dimension", KEYWORD_dimension }, + { "dll_export", KEYWORD_dllexport }, + { "dll_import", KEYWORD_dllimport }, + { "do", KEYWORD_do }, + { "double", KEYWORD_double }, + { "elemental", KEYWORD_elemental }, + { "end", KEYWORD_end }, + { "entry", KEYWORD_entry }, + { "equivalence", KEYWORD_equivalence }, + { "external", KEYWORD_external }, + { "format", KEYWORD_format }, + { "function", KEYWORD_function }, + { "if", KEYWORD_if }, + { "implicit", KEYWORD_implicit }, + { "include", KEYWORD_include }, + { "inline", KEYWORD_inline }, + { "integer", KEYWORD_integer }, + { "intent", KEYWORD_intent }, + { "interface", KEYWORD_interface }, + { "intrinsic", KEYWORD_intrinsic }, + { "logical", KEYWORD_logical }, + { "map", KEYWORD_map }, + { "module", KEYWORD_module }, + { "namelist", KEYWORD_namelist }, + { "operator", KEYWORD_operator }, + { "optional", KEYWORD_optional }, + { "parameter", KEYWORD_parameter }, + { "pascal", KEYWORD_pascal }, + { "pexternal", KEYWORD_pexternal }, + { "pglobal", KEYWORD_pglobal }, + { "pointer", KEYWORD_pointer }, + { "precision", KEYWORD_precision }, + { "private", KEYWORD_private }, + { "program", KEYWORD_program }, + { "public", KEYWORD_public }, + { "pure", KEYWORD_pure }, + { "real", KEYWORD_real }, + { "record", KEYWORD_record }, + { "recursive", KEYWORD_recursive }, + { "save", KEYWORD_save }, + { "select", KEYWORD_select }, + { "sequence", KEYWORD_sequence }, + { "static", KEYWORD_static }, + { "stdcall", KEYWORD_stdcall }, + { "structure", KEYWORD_structure }, + { "subroutine", KEYWORD_subroutine }, + { "target", KEYWORD_target }, + { "then", KEYWORD_then }, + { "type", KEYWORD_type }, + { "union", KEYWORD_union }, + { "use", KEYWORD_use }, + { "value", KEYWORD_value }, + { "virtual", KEYWORD_virtual }, + { "volatile", KEYWORD_volatile }, + { "where", KEYWORD_where }, + { "while", KEYWORD_while } +}; + +static struct { + unsigned int count; + unsigned int max; + tokenInfo* list; +} Ancestors = { 0, 0, NULL }; + +/* +* FUNCTION PROTOTYPES +*/ +static void parseStructureStmt (tokenInfo *const token); +static void parseUnionStmt (tokenInfo *const token); +static void parseDerivedTypeDef (tokenInfo *const token); +static void parseFunctionSubprogram (tokenInfo *const token); +static void parseSubroutineSubprogram (tokenInfo *const token); + +/* +* FUNCTION DEFINITIONS +*/ + +static void ancestorPush (tokenInfo *const token) +{ + enum { incrementalIncrease = 10 }; + if (Ancestors.list == NULL) + { + Assert (Ancestors.max == 0); + Ancestors.count = 0; + Ancestors.max = incrementalIncrease; + Ancestors.list = xMalloc (Ancestors.max, tokenInfo); + } + else if (Ancestors.count == Ancestors.max) + { + Ancestors.max += incrementalIncrease; + Ancestors.list = xRealloc (Ancestors.list, Ancestors.max, tokenInfo); + } + Ancestors.list [Ancestors.count] = *token; + Ancestors.list [Ancestors.count].string = vStringNewCopy (token->string); + Ancestors.count++; +} + +static void ancestorPop (void) +{ + Assert (Ancestors.count > 0); + --Ancestors.count; + vStringDelete (Ancestors.list [Ancestors.count].string); + + Ancestors.list [Ancestors.count].type = TOKEN_UNDEFINED; + Ancestors.list [Ancestors.count].keyword = KEYWORD_NONE; + Ancestors.list [Ancestors.count].secondary = NULL; + Ancestors.list [Ancestors.count].tag = TAG_UNDEFINED; + Ancestors.list [Ancestors.count].string = NULL; + Ancestors.list [Ancestors.count].lineNumber = 0L; +} + +static const tokenInfo* ancestorScope (void) +{ + tokenInfo *result = NULL; + unsigned int i; + for (i = Ancestors.count ; i > 0 && result == NULL ; --i) + { + tokenInfo *const token = Ancestors.list + i - 1; + if (token->type == TOKEN_IDENTIFIER && + token->tag != TAG_UNDEFINED && token->tag != TAG_INTERFACE) + result = token; + } + return result; +} + +static const tokenInfo* ancestorTop (void) +{ + Assert (Ancestors.count > 0); + return &Ancestors.list [Ancestors.count - 1]; +} + +#define ancestorCount() (Ancestors.count) + +static void ancestorClear (void) +{ + while (Ancestors.count > 0) + ancestorPop (); + if (Ancestors.list != NULL) + eFree (Ancestors.list); + Ancestors.list = NULL; + Ancestors.count = 0; + Ancestors.max = 0; +} + +static boolean insideInterface (void) +{ + boolean result = FALSE; + unsigned int i; + for (i = 0 ; i < Ancestors.count && !result ; ++i) + { + if (Ancestors.list [i].tag == TAG_INTERFACE) + result = TRUE; + } + return result; +} + +static void buildFortranKeywordHash (void) +{ + const size_t count = + sizeof (FortranKeywordTable) / sizeof (FortranKeywordTable [0]); + size_t i; + for (i = 0 ; i < count ; ++i) + { + const keywordDesc* const p = &FortranKeywordTable [i]; + addKeyword (p->name, Lang_fortran, (int) p->id); + } +} + +/* +* Tag generation functions +*/ + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->tag = TAG_UNDEFINED; + token->string = vStringNew (); + token->secondary = NULL; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + + return token; +} + +static tokenInfo *newTokenFrom (tokenInfo *const token) +{ + tokenInfo *result = newToken (); + *result = *token; + result->string = vStringNewCopy (token->string); + token->secondary = NULL; + return result; +} + +static void deleteToken (tokenInfo *const token) +{ + if (token != NULL) + { + vStringDelete (token->string); + deleteToken (token->secondary); + token->secondary = NULL; + eFree (token); + } +} + +static boolean isFileScope (const tagType type) +{ + return (boolean) (type == TAG_LABEL || type == TAG_LOCAL); +} + +static boolean includeTag (const tagType type) +{ + boolean include; + Assert (type != TAG_UNDEFINED); + include = FortranKinds [(int) type].enabled; + if (include && isFileScope (type)) + include = Option.include.fileScope; + return include; +} + +static void makeFortranTag (tokenInfo *const token, tagType tag) +{ + token->tag = tag; + if (includeTag (token->tag)) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + + initTagEntry (&e, name); + + if (token->tag == TAG_COMMON_BLOCK) + e.lineNumberEntry = (boolean) (Option.locate != EX_PATTERN); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.isFileScope = isFileScope (token->tag); + e.kindName = FortranKinds [token->tag].name; + e.kind = FortranKinds [token->tag].letter; + e.truncateLine = (boolean) (token->tag != TAG_LABEL); + + if (ancestorCount () > 0) + { + const tokenInfo* const scope = ancestorScope (); + if (scope != NULL) + { + e.extensionFields.scope [0] = FortranKinds [scope->tag].name; + e.extensionFields.scope [1] = vStringValue (scope->string); + } + } + if (! insideInterface () || includeTag (TAG_INTERFACE)) + makeTagEntry (&e); + } +} + +/* +* Parsing functions +*/ + +static int skipLine (void) +{ + int c; + + do + c = fileGetc (); + while (c != EOF && c != '\n'); + + return c; +} + +static void makeLabelTag (vString *const label) +{ + tokenInfo *token = newToken (); + token->type = TOKEN_LABEL; + vStringCopy (token->string, label); + makeFortranTag (token, TAG_LABEL); + deleteToken (token); +} + +static lineType getLineType (void) +{ + vString *label = vStringNew (); + int column = 0; + lineType type = LTYPE_UNDETERMINED; + + do /* read in first 6 "margin" characters */ + { + int c = fileGetc (); + + /* 3.2.1 Comment_Line. A comment line is any line that contains + * a C or an asterisk in column 1, or contains only blank characters + * in columns 1 through 72. A comment line that contains a C or + * an asterisk in column 1 may contain any character capable of + * representation in the processor in columns 2 through 72. + */ + /* EXCEPTION! Some compilers permit '!' as a commment character here. + * + * Treat # and $ in column 1 as comment to permit preprocessor directives. + * Treat D and d in column 1 as comment for HP debug statements. + */ + if (column == 0 && strchr ("*Cc!#$Dd", c) != NULL) + type = LTYPE_COMMENT; + else if (c == '\t') /* EXCEPTION! Some compilers permit a tab here */ + { + column = 8; + type = LTYPE_INITIAL; + } + else if (column == 5) + { + /* 3.2.2 Initial_Line. An initial line is any line that is not + * a comment line and contains the character blank or the digit 0 + * in column 6. Columns 1 through 5 may contain a statement label + * (3.4), or each of the columns 1 through 5 must contain the + * character blank. + */ + if (c == ' ' || c == '0') + type = LTYPE_INITIAL; + + /* 3.2.3 Continuation_Line. A continuation line is any line that + * contains any character of the FORTRAN character set other than + * the character blank or the digit 0 in column 6 and contains + * only blank characters in columns 1 through 5. + */ + else if (vStringLength (label) == 0) + type = LTYPE_CONTINUATION; + else + type = LTYPE_INVALID; + } + else if (c == ' ') + ; + else if (c == EOF) + type = LTYPE_EOF; + else if (c == '\n') + type = LTYPE_SHORT; + else if (isdigit (c)) + vStringPut (label, c); + else + type = LTYPE_INVALID; + + ++column; + } while (column < 6 && type == LTYPE_UNDETERMINED); + + Assert (type != LTYPE_UNDETERMINED); + + if (vStringLength (label) > 0) + { + vStringTerminate (label); + makeLabelTag (label); + } + vStringDelete (label); + return type; +} + +static int getFixedFormChar (void) +{ + boolean newline = FALSE; + lineType type; + int c = '\0'; + + if (Column > 0) + { +#ifdef STRICT_FIXED_FORM + /* EXCEPTION! Some compilers permit more than 72 characters per line. + */ + if (Column > 71) + c = skipLine (); + else +#endif + { + c = fileGetc (); + ++Column; + } + if (c == '\n') + { + newline = TRUE; /* need to check for continuation line */ + Column = 0; + } + else if (c == '!' && ! ParsingString) + { + c = skipLine (); + newline = TRUE; /* need to check for continuation line */ + Column = 0; + } + else if (c == '&') /* check for free source form */ + { + const int c2 = fileGetc (); + if (c2 == '\n') + longjmp (Exception, (int) ExceptionFixedFormat); + else + fileUngetc (c2); + } + } + while (Column == 0) + { + type = getLineType (); + switch (type) + { + case LTYPE_UNDETERMINED: + case LTYPE_INVALID: + longjmp (Exception, (int) ExceptionFixedFormat); + break; + + case LTYPE_SHORT: break; + case LTYPE_COMMENT: skipLine (); break; + + case LTYPE_EOF: + Column = 6; + if (newline) + c = '\n'; + else + c = EOF; + break; + + case LTYPE_INITIAL: + if (newline) + { + c = '\n'; + Column = 6; + break; + } + /* fall through to next case */ + case LTYPE_CONTINUATION: + Column = 5; + do + { + c = fileGetc (); + ++Column; + } while (isBlank (c)); + if (c == '\n') + Column = 0; + else if (Column > 6) + { + fileUngetc (c); + c = ' '; + } + break; + + default: + Assert ("Unexpected line type" == NULL); + } + } + return c; +} + +static int skipToNextLine (void) +{ + int c = skipLine (); + if (c != EOF) + c = fileGetc (); + return c; +} + +static int getFreeFormChar (void) +{ + static boolean newline = TRUE; + boolean advanceLine = FALSE; + int c = fileGetc (); + + /* If the last nonblank, non-comment character of a FORTRAN 90 + * free-format text line is an ampersand then the next non-comment + * line is a continuation line. + */ + if (c == '&') + { + do + c = fileGetc (); + while (isspace (c) && c != '\n'); + if (c == '\n') + { + newline = TRUE; + advanceLine = TRUE; + } + else if (c == '!') + advanceLine = TRUE; + else + { + fileUngetc (c); + c = '&'; + } + } + else if (newline && (c == '!' || c == '#')) + advanceLine = TRUE; + while (advanceLine) + { + while (isspace (c)) + c = fileGetc (); + if (c == '!' || (newline && c == '#')) + { + c = skipToNextLine (); + newline = TRUE; + continue; + } + if (c == '&') + c = fileGetc (); + else + advanceLine = FALSE; + } + newline = (boolean) (c == '\n'); + return c; +} + +static int getChar (void) +{ + int c; + + if (Ungetc != '\0') + { + c = Ungetc; + Ungetc = '\0'; + } + else if (FreeSourceForm) + c = getFreeFormChar (); + else + c = getFixedFormChar (); + return c; +} + +static void ungetChar (const int c) +{ + Ungetc = c; +} + +/* If a numeric is passed in 'c', this is used as the first digit of the + * numeric being parsed. + */ +static vString *parseInteger (int c) +{ + vString *string = vStringNew (); + + if (c == '-') + { + vStringPut (string, c); + c = getChar (); + } + else if (! isdigit (c)) + c = getChar (); + while (c != EOF && isdigit (c)) + { + vStringPut (string, c); + c = getChar (); + } + vStringTerminate (string); + + if (c == '_') + { + do + c = getChar (); + while (c != EOF && isalpha (c)); + } + ungetChar (c); + + return string; +} + +static vString *parseNumeric (int c) +{ + vString *string = vStringNew (); + vString *integer = parseInteger (c); + vStringCopy (string, integer); + vStringDelete (integer); + + c = getChar (); + if (c == '.') + { + integer = parseInteger ('\0'); + vStringPut (string, c); + vStringCat (string, integer); + vStringDelete (integer); + c = getChar (); + } + if (tolower (c) == 'e') + { + integer = parseInteger ('\0'); + vStringPut (string, c); + vStringCat (string, integer); + vStringDelete (integer); + } + else + ungetChar (c); + + vStringTerminate (string); + + return string; +} + +static void parseString (vString *const string, const int delimiter) +{ + const unsigned long inputLineNumber = getInputLineNumber (); + int c; + ParsingString = TRUE; + c = getChar (); + while (c != delimiter && c != '\n' && c != EOF) + { + vStringPut (string, c); + c = getChar (); + } + if (c == '\n' || c == EOF) + { + verbose ("%s: unterminated character string at line %lu\n", + getInputFileName (), inputLineNumber); + if (c == EOF) + longjmp (Exception, (int) ExceptionEOF); + else if (! FreeSourceForm) + longjmp (Exception, (int) ExceptionFixedFormat); + } + vStringTerminate (string); + ParsingString = FALSE; +} + +/* Read a C identifier beginning with "firstChar" and places it into "name". + */ +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + + do + { + vStringPut (string, c); + c = getChar (); + } while (isident (c)); + + vStringTerminate (string); + ungetChar (c); /* unget non-identifier character */ +} + +static void checkForLabel (void) +{ + tokenInfo* token = NULL; + int length; + int c; + + do + c = getChar (); + while (isBlank (c)); + + for (length = 0 ; isdigit (c) && length < 5 ; ++length) + { + if (token == NULL) + { + token = newToken (); + token->type = TOKEN_LABEL; + } + vStringPut (token->string, c); + c = getChar (); + } + if (length > 0 && token != NULL) + { + vStringTerminate (token->string); + makeFortranTag (token, TAG_LABEL); + deleteToken (token); + } + ungetChar (c); +} + +static void readIdentifier (tokenInfo *const token, const int c) +{ + parseIdentifier (token->string, c); + token->keyword = analyzeToken (token->string, Lang_fortran); + if (! isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_KEYWORD; + else + { + token->type = TOKEN_IDENTIFIER; + if (strncmp (vStringValue (token->string), "end", 3) == 0) + { + vString *const sub = vStringNewInit (vStringValue (token->string) + 3); + const keywordId kw = analyzeToken (sub, Lang_fortran); + vStringDelete (sub); + if (kw != KEYWORD_NONE) + { + token->secondary = newToken (); + token->secondary->type = TOKEN_KEYWORD; + token->secondary->keyword = kw; + token->keyword = KEYWORD_end; + } + } + } +} + +static void readToken (tokenInfo *const token) +{ + int c; + + deleteToken (token->secondary); + token->type = TOKEN_UNDEFINED; + token->tag = TAG_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->secondary = NULL; + vStringClear (token->string); + +getNextChar: + c = getChar (); + + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + + switch (c) + { + case EOF: longjmp (Exception, (int) ExceptionEOF); break; + case ' ': goto getNextChar; + case '\t': goto getNextChar; + case ',': token->type = TOKEN_COMMA; break; + case '(': token->type = TOKEN_PAREN_OPEN; break; + case ')': token->type = TOKEN_PAREN_CLOSE; break; + case '%': token->type = TOKEN_PERCENT; break; + + case '*': + case '/': + case '+': + case '-': + case '=': + case '<': + case '>': + { + const char *const operatorChars = "*/+=<>"; + do { + vStringPut (token->string, c); + c = getChar (); + } while (strchr (operatorChars, c) != NULL); + ungetChar (c); + vStringTerminate (token->string); + token->type = TOKEN_OPERATOR; + break; + } + + case '!': + if (FreeSourceForm) + { + do + c = getChar (); + while (c != '\n' && c != EOF); + } + else + { + skipLine (); + Column = 0; + } + /* fall through to newline case */ + case '\n': + token->type = TOKEN_STATEMENT_END; + if (FreeSourceForm) + checkForLabel (); + break; + + case '.': + parseIdentifier (token->string, c); + c = getChar (); + if (c == '.') + { + vStringPut (token->string, c); + vStringTerminate (token->string); + token->type = TOKEN_OPERATOR; + } + else + { + ungetChar (c); + token->type = TOKEN_UNDEFINED; + } + break; + + case '"': + case '\'': + parseString (token->string, c); + token->type = TOKEN_STRING; + break; + + case ';': + token->type = TOKEN_STATEMENT_END; + break; + + case ':': + c = getChar (); + if (c == ':') + token->type = TOKEN_DOUBLE_COLON; + else + { + ungetChar (c); + token->type = TOKEN_UNDEFINED; + } + break; + + default: + if (isalpha (c)) + readIdentifier (token, c); + else if (isdigit (c)) + { + vString *numeric = parseNumeric (c); + vStringCat (token->string, numeric); + vStringDelete (numeric); + token->type = TOKEN_NUMERIC; + } + else + token->type = TOKEN_UNDEFINED; + break; + } +} + +static void readSubToken (tokenInfo *const token) +{ + if (token->secondary == NULL) + { + token->secondary = newToken (); + readToken (token->secondary); + } +} + +/* +* Scanning functions +*/ + +static void skipToToken (tokenInfo *const token, tokenType type) +{ + while (! isType (token, type) && ! isType (token, TOKEN_STATEMENT_END) && + !(token->secondary != NULL && isType (token->secondary, TOKEN_STATEMENT_END))) + readToken (token); +} + +static void skipPast (tokenInfo *const token, tokenType type) +{ + skipToToken (token, type); + if (! isType (token, TOKEN_STATEMENT_END)) + readToken (token); +} + +static void skipToNextStatement (tokenInfo *const token) +{ + do + { + skipToToken (token, TOKEN_STATEMENT_END); + readToken (token); + } while (isType (token, TOKEN_STATEMENT_END)); +} + +/* skip over parenthesis enclosed contents starting at next token. + * Token is left at the first token following closing parenthesis. If an + * opening parenthesis is not found, `token' is moved to the end of the + * statement. + */ +static void skipOverParens (tokenInfo *const token) +{ + int level = 0; + do { + if (isType (token, TOKEN_STATEMENT_END)) + break; + else if (isType (token, TOKEN_PAREN_OPEN)) + ++level; + else if (isType (token, TOKEN_PAREN_CLOSE)) + --level; + readToken (token); + } while (level > 0); +} + +static boolean isTypeSpec (tokenInfo *const token) +{ + boolean result; + switch (token->keyword) + { + case KEYWORD_byte: + case KEYWORD_integer: + case KEYWORD_real: + case KEYWORD_double: + case KEYWORD_complex: + case KEYWORD_character: + case KEYWORD_logical: + case KEYWORD_record: + case KEYWORD_type: + result = TRUE; + break; + default: + result = FALSE; + break; + } + return result; +} + +static boolean isSubprogramPrefix (tokenInfo *const token) +{ + boolean result; + switch (token->keyword) + { + case KEYWORD_elemental: + case KEYWORD_pure: + case KEYWORD_recursive: + case KEYWORD_stdcall: + result = TRUE; + break; + default: + result = FALSE; + break; + } + return result; +} + +/* type-spec + * is INTEGER [kind-selector] + * or REAL [kind-selector] is ( etc. ) + * or DOUBLE PRECISION + * or COMPLEX [kind-selector] + * or CHARACTER [kind-selector] + * or LOGICAL [kind-selector] + * or TYPE ( type-name ) + * + * Note that INTEGER and REAL may be followed by "*N" where "N" is an integer + */ +static void parseTypeSpec (tokenInfo *const token) +{ + /* parse type-spec, leaving `token' at first token following type-spec */ + Assert (isTypeSpec (token)); + switch (token->keyword) + { + case KEYWORD_character: + /* skip char-selector */ + readToken (token); + if (isType (token, TOKEN_OPERATOR) && + strcmp (vStringValue (token->string), "*") == 0) + readToken (token); + if (isType (token, TOKEN_PAREN_OPEN)) + skipOverParens (token); + else if (isType (token, TOKEN_NUMERIC)) + readToken (token); + break; + + + case KEYWORD_byte: + case KEYWORD_complex: + case KEYWORD_integer: + case KEYWORD_logical: + case KEYWORD_real: + readToken (token); + if (isType (token, TOKEN_PAREN_OPEN)) + skipOverParens (token); /* skip kind-selector */ + if (isType (token, TOKEN_OPERATOR) && + strcmp (vStringValue (token->string), "*") == 0) + { + readToken (token); + readToken (token); + } + break; + + case KEYWORD_double: + readToken (token); + if (isKeyword (token, KEYWORD_complex) || + isKeyword (token, KEYWORD_precision)) + readToken (token); + else + skipToToken (token, TOKEN_STATEMENT_END); + break; + + case KEYWORD_record: + readToken (token); + if (isType (token, TOKEN_OPERATOR) && + strcmp (vStringValue (token->string), "/") == 0) + { + readToken (token); /* skip to structure name */ + readToken (token); /* skip to '/' */ + readToken (token); /* skip to variable name */ + } + break; + + case KEYWORD_type: + readToken (token); + if (isType (token, TOKEN_PAREN_OPEN)) + skipOverParens (token); /* skip type-name */ + else + parseDerivedTypeDef (token); + break; + + default: + skipToToken (token, TOKEN_STATEMENT_END); + break; + } +} + +static boolean skipStatementIfKeyword (tokenInfo *const token, keywordId keyword) +{ + boolean result = FALSE; + if (isKeyword (token, keyword)) + { + result = TRUE; + skipToNextStatement (token); + } + return result; +} + +/* parse a list of qualifying specifiers, leaving `token' at first token + * following list. Examples of such specifiers are: + * [[, attr-spec] ::] + * [[, component-attr-spec-list] ::] + * + * attr-spec + * is PARAMETER + * or access-spec (is PUBLIC or PRIVATE) + * or ALLOCATABLE + * or DIMENSION ( array-spec ) + * or EXTERNAL + * or INTENT ( intent-spec ) + * or INTRINSIC + * or OPTIONAL + * or POINTER + * or SAVE + * or TARGET + * + * component-attr-spec + * is POINTER + * or DIMENSION ( component-array-spec ) + */ +static void parseQualifierSpecList (tokenInfo *const token) +{ + do + { + readToken (token); /* should be an attr-spec */ + switch (token->keyword) + { + case KEYWORD_parameter: + case KEYWORD_allocatable: + case KEYWORD_external: + case KEYWORD_intrinsic: + case KEYWORD_optional: + case KEYWORD_private: + case KEYWORD_pointer: + case KEYWORD_public: + case KEYWORD_save: + case KEYWORD_target: + readToken (token); + break; + + case KEYWORD_dimension: + case KEYWORD_intent: + readToken (token); + skipOverParens (token); + break; + + default: skipToToken (token, TOKEN_STATEMENT_END); break; + } + } while (isType (token, TOKEN_COMMA)); + if (! isType (token, TOKEN_DOUBLE_COLON)) + skipToToken (token, TOKEN_STATEMENT_END); +} + +static tagType variableTagType (void) +{ + tagType result = TAG_VARIABLE; + if (ancestorCount () > 0) + { + const tokenInfo* const parent = ancestorTop (); + switch (parent->tag) + { + case TAG_MODULE: result = TAG_VARIABLE; break; + case TAG_DERIVED_TYPE: result = TAG_COMPONENT; break; + case TAG_FUNCTION: result = TAG_LOCAL; break; + case TAG_SUBROUTINE: result = TAG_LOCAL; break; + default: result = TAG_VARIABLE; break; + } + } + return result; +} + +static void parseEntityDecl (tokenInfo *const token) +{ + Assert (isType (token, TOKEN_IDENTIFIER)); + makeFortranTag (token, variableTagType ()); + readToken (token); + if (isType (token, TOKEN_PAREN_OPEN)) + skipOverParens (token); + if (isType (token, TOKEN_OPERATOR) && + strcmp (vStringValue (token->string), "*") == 0) + { + readToken (token); /* read char-length */ + if (isType (token, TOKEN_PAREN_OPEN)) + skipOverParens (token); + else + readToken (token); + } + if (isType (token, TOKEN_OPERATOR)) + { + if (strcmp (vStringValue (token->string), "/") == 0) + { /* skip over initializations of structure field */ + readToken (token); + skipPast (token, TOKEN_OPERATOR); + } + else if (strcmp (vStringValue (token->string), "=") == 0) + { + while (! isType (token, TOKEN_COMMA) && + ! isType (token, TOKEN_STATEMENT_END)) + { + readToken (token); + if (isType (token, TOKEN_PAREN_OPEN)) + skipOverParens (token); + } + } + } + /* token left at either comma or statement end */ +} + +static void parseEntityDeclList (tokenInfo *const token) +{ + if (isType (token, TOKEN_PERCENT)) + skipToNextStatement (token); + else while (isType (token, TOKEN_IDENTIFIER) || + (isType (token, TOKEN_KEYWORD) && + !isKeyword (token, KEYWORD_function) && + !isKeyword (token, KEYWORD_subroutine))) + { + /* compilers accept keywoeds as identifiers */ + if (isType (token, TOKEN_KEYWORD)) + token->type = TOKEN_IDENTIFIER; + parseEntityDecl (token); + if (isType (token, TOKEN_COMMA)) + readToken (token); + else if (isType (token, TOKEN_STATEMENT_END)) + { + skipToNextStatement (token); + break; + } + } +} + +/* type-declaration-stmt is + * type-spec [[, attr-spec] ... ::] entity-decl-list + */ +static void parseTypeDeclarationStmt (tokenInfo *const token) +{ + Assert (isTypeSpec (token)); + parseTypeSpec (token); + if (!isType (token, TOKEN_STATEMENT_END)) /* if not end of derived type... */ + { + if (isType (token, TOKEN_COMMA)) + parseQualifierSpecList (token); + if (isType (token, TOKEN_DOUBLE_COLON)) + readToken (token); + parseEntityDeclList (token); + } + if (isType (token, TOKEN_STATEMENT_END)) + skipToNextStatement (token); +} + +/* namelist-stmt is + * NAMELIST /namelist-group-name/ namelist-group-object-list + * [[,]/[namelist-group-name]/ namelist-block-object-list] ... + * + * namelist-group-object is + * variable-name + * + * common-stmt is + * COMMON [/[common-block-name]/] common-block-object-list + * [[,]/[common-block-name]/ common-block-object-list] ... + * + * common-block-object is + * variable-name [ ( explicit-shape-spec-list ) ] + */ +static void parseCommonNamelistStmt (tokenInfo *const token, tagType type) +{ + Assert (isKeyword (token, KEYWORD_common) || + isKeyword (token, KEYWORD_namelist)); + readToken (token); + do + { + if (isType (token, TOKEN_OPERATOR) && + strcmp (vStringValue (token->string), "/") == 0) + { + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + { + makeFortranTag (token, type); + readToken (token); + } + skipPast (token, TOKEN_OPERATOR); + } + if (isType (token, TOKEN_IDENTIFIER)) + makeFortranTag (token, TAG_LOCAL); + readToken (token); + if (isType (token, TOKEN_PAREN_OPEN)) + skipOverParens (token); /* skip explicit-shape-spec-list */ + if (isType (token, TOKEN_COMMA)) + readToken (token); + } while (! isType (token, TOKEN_STATEMENT_END)); + skipToNextStatement (token); +} + +static void parseFieldDefinition (tokenInfo *const token) +{ + if (isTypeSpec (token)) + parseTypeDeclarationStmt (token); + else if (isKeyword (token, KEYWORD_structure)) + parseStructureStmt (token); + else if (isKeyword (token, KEYWORD_union)) + parseUnionStmt (token); + else + skipToNextStatement (token); +} + +static void parseMap (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_map)); + skipToNextStatement (token); + while (! isKeyword (token, KEYWORD_end)) + parseFieldDefinition (token); + readSubToken (token); + /* should be at KEYWORD_map token */ + skipToNextStatement (token); +} + +/* UNION + * MAP + * [field-definition] [field-definition] ... + * END MAP + * MAP + * [field-definition] [field-definition] ... + * END MAP + * [MAP + * [field-definition] + * [field-definition] ... + * END MAP] ... + * END UNION + * * + * + * Typed data declarations (variables or arrays) in structure declarations + * have the form of normal Fortran typed data declarations. Data items with + * different types can be freely intermixed within a structure declaration. + * + * Unnamed fields can be declared in a structure by specifying the pseudo + * name %FILL in place of an actual field name. You can use this mechanism to + * generate empty space in a record for purposes such as alignment. + * + * All mapped field declarations that are made within a UNION declaration + * share a common location within the containing structure. When initializing + * the fields within a UNION, the final initialization value assigned + * overlays any value previously assigned to a field definition that shares + * that field. + */ +static void parseUnionStmt (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_union)); + skipToNextStatement (token); + while (isKeyword (token, KEYWORD_map)) + parseMap (token); + /* should be at KEYWORD_end token */ + readSubToken (token); + /* secondary token should be KEYWORD_end token */ + skipToNextStatement (token); +} + +/* STRUCTURE [/structure-name/] [field-names] + * [field-definition] + * [field-definition] ... + * END STRUCTURE + * + * structure-name + * identifies the structure in a subsequent RECORD statement. + * Substructures can be established within a structure by means of either + * a nested STRUCTURE declaration or a RECORD statement. + * + * field-names + * (for substructure declarations only) one or more names having the + * structure of the substructure being defined. + * + * field-definition + * can be one or more of the following: + * + * Typed data declarations, which can optionally include one or more + * data initialization values. + * + * Substructure declarations (defined by either RECORD statements or + * subsequent STRUCTURE statements). + * + * UNION declarations, which are mapped fields defined by a block of + * statements. The syntax of a UNION declaration is described below. + * + * PARAMETER statements, which do not affect the form of the + * structure. + */ +static void parseStructureStmt (tokenInfo *const token) +{ + tokenInfo *name; + Assert (isKeyword (token, KEYWORD_structure)); + readToken (token); + if (isType (token, TOKEN_OPERATOR) && + strcmp (vStringValue (token->string), "/") == 0) + { /* read structure name */ + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + makeFortranTag (token, TAG_DERIVED_TYPE); + name = newTokenFrom (token); + skipPast (token, TOKEN_OPERATOR); + } + else + { /* fake out anonymous structure */ + name = newToken (); + name->type = TOKEN_IDENTIFIER; + name->tag = TAG_DERIVED_TYPE; + vStringCopyS (name->string, "anonymous"); + } + while (isType (token, TOKEN_IDENTIFIER)) + { /* read field names */ + makeFortranTag (token, TAG_COMPONENT); + readToken (token); + if (isType (token, TOKEN_COMMA)) + readToken (token); + } + skipToNextStatement (token); + ancestorPush (name); + while (! isKeyword (token, KEYWORD_end)) + parseFieldDefinition (token); + readSubToken (token); + /* secondary token should be KEYWORD_structure token */ + skipToNextStatement (token); + ancestorPop (); + deleteToken (name); +} + +/* specification-stmt + * is access-stmt (is access-spec [[::] access-id-list) + * or allocatable-stmt (is ALLOCATABLE [::] array-name etc.) + * or common-stmt (is COMMON [ / [common-block-name] /] etc.) + * or data-stmt (is DATA data-stmt-list [[,] data-stmt-set] ...) + * or dimension-stmt (is DIMENSION [::] array-name etc.) + * or equivalence-stmt (is EQUIVALENCE equivalence-set-list) + * or external-stmt (is EXTERNAL etc.) + * or intent-stmt (is INTENT ( intent-spec ) [::] etc.) + * or instrinsic-stmt (is INTRINSIC etc.) + * or namelist-stmt (is NAMELIST / namelist-group-name / etc.) + * or optional-stmt (is OPTIONAL [::] etc.) + * or pointer-stmt (is POINTER [::] object-name etc.) + * or save-stmt (is SAVE etc.) + * or target-stmt (is TARGET [::] object-name etc.) + * + * access-spec is PUBLIC or PRIVATE + */ +static boolean parseSpecificationStmt (tokenInfo *const token) +{ + boolean result = TRUE; + switch (token->keyword) + { + case KEYWORD_common: + parseCommonNamelistStmt (token, TAG_COMMON_BLOCK); + break; + + case KEYWORD_namelist: + parseCommonNamelistStmt (token, TAG_NAMELIST); + break; + + case KEYWORD_structure: + parseStructureStmt (token); + break; + + case KEYWORD_allocatable: + case KEYWORD_data: + case KEYWORD_dimension: + case KEYWORD_equivalence: + case KEYWORD_external: + case KEYWORD_intent: + case KEYWORD_intrinsic: + case KEYWORD_optional: + case KEYWORD_pointer: + case KEYWORD_private: + case KEYWORD_public: + case KEYWORD_save: + case KEYWORD_target: + skipToNextStatement (token); + break; + + default: + result = FALSE; + break; + } + return result; +} + +/* component-def-stmt is + * type-spec [[, component-attr-spec-list] ::] component-decl-list + * + * component-decl is + * component-name [ ( component-array-spec ) ] [ * char-length ] + */ +static void parseComponentDefStmt (tokenInfo *const token) +{ + Assert (isTypeSpec (token)); + parseTypeSpec (token); + if (isType (token, TOKEN_COMMA)) + parseQualifierSpecList (token); + if (isType (token, TOKEN_DOUBLE_COLON)) + readToken (token); + parseEntityDeclList (token); +} + +/* derived-type-def is + * derived-type-stmt is (TYPE [[, access-spec] ::] type-name + * [private-sequence-stmt] ... (is PRIVATE or SEQUENCE) + * component-def-stmt + * [component-def-stmt] ... + * end-type-stmt + */ +static void parseDerivedTypeDef (tokenInfo *const token) +{ + if (isType (token, TOKEN_COMMA)) + parseQualifierSpecList (token); + if (isType (token, TOKEN_DOUBLE_COLON)) + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + makeFortranTag (token, TAG_DERIVED_TYPE); + ancestorPush (token); + skipToNextStatement (token); + if (isKeyword (token, KEYWORD_private) || + isKeyword (token, KEYWORD_sequence)) + { + skipToNextStatement (token); + } + while (! isKeyword (token, KEYWORD_end)) + { + if (isTypeSpec (token)) + parseComponentDefStmt (token); + else + skipToNextStatement (token); + } + readSubToken (token); + /* secondary token should be KEYWORD_type token */ + skipToToken (token, TOKEN_STATEMENT_END); + ancestorPop (); +} + +/* interface-block + * interface-stmt (is INTERFACE [generic-spec]) + * [interface-body] + * [module-procedure-stmt] ... + * end-interface-stmt (is END INTERFACE) + * + * generic-spec + * is generic-name + * or OPERATOR ( defined-operator ) + * or ASSIGNMENT ( = ) + * + * interface-body + * is function-stmt + * [specification-part] + * end-function-stmt + * or subroutine-stmt + * [specification-part] + * end-subroutine-stmt + * + * module-procedure-stmt is + * MODULE PROCEDURE procedure-name-list + */ +static void parseInterfaceBlock (tokenInfo *const token) +{ + tokenInfo *name = NULL; + Assert (isKeyword (token, KEYWORD_interface)); + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + { + makeFortranTag (token, TAG_INTERFACE); + name = newTokenFrom (token); + } + else if (isKeyword (token, KEYWORD_assignment) || + isKeyword (token, KEYWORD_operator)) + { + readToken (token); + if (isType (token, TOKEN_PAREN_OPEN)) + readToken (token); + if (isType (token, TOKEN_OPERATOR)) + { + makeFortranTag (token, TAG_INTERFACE); + name = newTokenFrom (token); + } + } + if (name == NULL) + { + name = newToken (); + name->type = TOKEN_IDENTIFIER; + name->tag = TAG_INTERFACE; + } + ancestorPush (name); + while (! isKeyword (token, KEYWORD_end)) + { + switch (token->keyword) + { + case KEYWORD_function: parseFunctionSubprogram (token); break; + case KEYWORD_subroutine: parseSubroutineSubprogram (token); break; + + default: + if (isSubprogramPrefix (token)) + readToken (token); + else if (isTypeSpec (token)) + parseTypeSpec (token); + else + skipToNextStatement (token); + break; + } + } + readSubToken (token); + /* secondary token should be KEYWORD_interface token */ + skipToNextStatement (token); + ancestorPop (); + deleteToken (name); +} + +/* entry-stmt is + * ENTRY entry-name [ ( dummy-arg-list ) ] + */ +static void parseEntryStmt (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_entry)); + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + makeFortranTag (token, TAG_ENTRY_POINT); + skipToNextStatement (token); +} + +/* stmt-function-stmt is + * function-name ([dummy-arg-name-list]) = scalar-expr + */ +static boolean parseStmtFunctionStmt (tokenInfo *const token) +{ + boolean result = FALSE; + Assert (isType (token, TOKEN_IDENTIFIER)); +#if 0 /* cannot reliably parse this yet */ + makeFortranTag (token, TAG_FUNCTION); +#endif + readToken (token); + if (isType (token, TOKEN_PAREN_OPEN)) + { + skipOverParens (token); + result = (boolean) (isType (token, TOKEN_OPERATOR) && + strcmp (vStringValue (token->string), "=") == 0); + } + skipToNextStatement (token); + return result; +} + +static boolean isIgnoredDeclaration (tokenInfo *const token) +{ + boolean result; + switch (token->keyword) + { + case KEYWORD_cexternal: + case KEYWORD_cglobal: + case KEYWORD_dllexport: + case KEYWORD_dllimport: + case KEYWORD_external: + case KEYWORD_format: + case KEYWORD_include: + case KEYWORD_inline: + case KEYWORD_parameter: + case KEYWORD_pascal: + case KEYWORD_pexternal: + case KEYWORD_pglobal: + case KEYWORD_static: + case KEYWORD_value: + case KEYWORD_virtual: + case KEYWORD_volatile: + result = TRUE; + break; + + default: + result = FALSE; + break; + } + return result; +} + +/* declaration-construct + * [derived-type-def] + * [interface-block] + * [type-declaration-stmt] + * [specification-stmt] + * [parameter-stmt] (is PARAMETER ( named-constant-def-list ) + * [format-stmt] (is FORMAT format-specification) + * [entry-stmt] + * [stmt-function-stmt] + */ +static boolean parseDeclarationConstruct (tokenInfo *const token) +{ + boolean result = TRUE; + switch (token->keyword) + { + case KEYWORD_entry: parseEntryStmt (token); break; + case KEYWORD_interface: parseInterfaceBlock (token); break; + case KEYWORD_stdcall: readToken (token); break; + /* derived type handled by parseTypeDeclarationStmt(); */ + + case KEYWORD_automatic: + readToken (token); + if (isTypeSpec (token)) + parseTypeDeclarationStmt (token); + else + skipToNextStatement (token); + result = TRUE; + break; + + default: + if (isIgnoredDeclaration (token)) + skipToNextStatement (token); + else if (isTypeSpec (token)) + { + parseTypeDeclarationStmt (token); + result = TRUE; + } + else if (isType (token, TOKEN_IDENTIFIER)) + result = parseStmtFunctionStmt (token); + else + result = parseSpecificationStmt (token); + break; + } + return result; +} + +/* implicit-part-stmt + * is [implicit-stmt] (is IMPLICIT etc.) + * or [parameter-stmt] (is PARAMETER etc.) + * or [format-stmt] (is FORMAT etc.) + * or [entry-stmt] (is ENTRY entry-name etc.) + */ +static boolean parseImplicitPartStmt (tokenInfo *const token) +{ + boolean result = TRUE; + switch (token->keyword) + { + case KEYWORD_entry: parseEntryStmt (token); break; + + case KEYWORD_implicit: + case KEYWORD_include: + case KEYWORD_parameter: + case KEYWORD_format: + skipToNextStatement (token); + break; + + default: result = FALSE; break; + } + return result; +} + +/* specification-part is + * [use-stmt] ... (is USE module-name etc.) + * [implicit-part] (is [implicit-part-stmt] ... [implicit-stmt]) + * [declaration-construct] ... + */ +static boolean parseSpecificationPart (tokenInfo *const token) +{ + boolean result = FALSE; + while (skipStatementIfKeyword (token, KEYWORD_use)) + result = TRUE; + while (parseImplicitPartStmt (token)) + result = TRUE; + while (parseDeclarationConstruct (token)) + result = TRUE; + return result; +} + +/* block-data is + * block-data-stmt (is BLOCK DATA [block-data-name] + * [specification-part] + * end-block-data-stmt (is END [BLOCK DATA [block-data-name]]) + */ +static void parseBlockData (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_block)); + readToken (token); + if (isKeyword (token, KEYWORD_data)) + { + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + makeFortranTag (token, TAG_BLOCK_DATA); + } + ancestorPush (token); + skipToNextStatement (token); + parseSpecificationPart (token); + while (! isKeyword (token, KEYWORD_end)) + skipToNextStatement (token); + readSubToken (token); + /* secondary token should be KEYWORD_NONE or KEYWORD_block token */ + skipToNextStatement (token); + ancestorPop (); +} + +/* internal-subprogram-part is + * contains-stmt (is CONTAINS) + * internal-subprogram + * [internal-subprogram] ... + * + * internal-subprogram + * is function-subprogram + * or subroutine-subprogram + */ +static void parseInternalSubprogramPart (tokenInfo *const token) +{ + boolean done = FALSE; + if (isKeyword (token, KEYWORD_contains)) + skipToNextStatement (token); + do + { + switch (token->keyword) + { + case KEYWORD_function: parseFunctionSubprogram (token); break; + case KEYWORD_subroutine: parseSubroutineSubprogram (token); break; + case KEYWORD_end: done = TRUE; break; + + default: + if (isSubprogramPrefix (token)) + readToken (token); + else if (isTypeSpec (token)) + parseTypeSpec (token); + else + readToken (token); + break; + } + } while (! done); +} + +/* module is + * module-stmt (is MODULE module-name) + * [specification-part] + * [module-subprogram-part] + * end-module-stmt (is END [MODULE [module-name]]) + * + * module-subprogram-part + * contains-stmt (is CONTAINS) + * module-subprogram + * [module-subprogram] ... + * + * module-subprogram + * is function-subprogram + * or subroutine-subprogram + */ +static void parseModule (tokenInfo *const token) +{ + Assert (isKeyword (token, KEYWORD_module)); + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + makeFortranTag (token, TAG_MODULE); + ancestorPush (token); + skipToNextStatement (token); + parseSpecificationPart (token); + if (isKeyword (token, KEYWORD_contains)) + parseInternalSubprogramPart (token); + while (! isKeyword (token, KEYWORD_end)) + skipToNextStatement (token); + readSubToken (token); + /* secondary token should be KEYWORD_NONE or KEYWORD_module token */ + skipToNextStatement (token); + ancestorPop (); +} + +/* execution-part + * executable-construct + * + * executable-contstruct is + * execution-part-construct [execution-part-construct] + * + * execution-part-construct + * is executable-construct + * or format-stmt + * or data-stmt + * or entry-stmt + */ +static boolean parseExecutionPart (tokenInfo *const token) +{ + boolean result = FALSE; + boolean done = FALSE; + while (! done) + { + switch (token->keyword) + { + default: + if (isSubprogramPrefix (token)) + readToken (token); + else + skipToNextStatement (token); + result = TRUE; + break; + + case KEYWORD_entry: + parseEntryStmt (token); + result = TRUE; + break; + + case KEYWORD_contains: + case KEYWORD_function: + case KEYWORD_subroutine: + done = TRUE; + break; + + case KEYWORD_end: + readSubToken (token); + if (isSecondaryKeyword (token, KEYWORD_do) || + isSecondaryKeyword (token, KEYWORD_if) || + isSecondaryKeyword (token, KEYWORD_select) || + isSecondaryKeyword (token, KEYWORD_where)) + { + skipToNextStatement (token); + result = TRUE; + } + else + done = TRUE; + break; + } + } + return result; +} + +static void parseSubprogram (tokenInfo *const token, const tagType tag) +{ + Assert (isKeyword (token, KEYWORD_program) || + isKeyword (token, KEYWORD_function) || + isKeyword (token, KEYWORD_subroutine)); + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + makeFortranTag (token, tag); + ancestorPush (token); + skipToNextStatement (token); + parseSpecificationPart (token); + parseExecutionPart (token); + if (isKeyword (token, KEYWORD_contains)) + parseInternalSubprogramPart (token); + /* should be at KEYWORD_end token */ + readSubToken (token); + /* secondary token should be one of KEYWORD_NONE, KEYWORD_program, + * KEYWORD_function, KEYWORD_function + */ + skipToNextStatement (token); + ancestorPop (); +} + + +/* function-subprogram is + * function-stmt (is [prefix] FUNCTION function-name etc.) + * [specification-part] + * [execution-part] + * [internal-subprogram-part] + * end-function-stmt (is END [FUNCTION [function-name]]) + * + * prefix + * is type-spec [RECURSIVE] + * or [RECURSIVE] type-spec + */ +static void parseFunctionSubprogram (tokenInfo *const token) +{ + parseSubprogram (token, TAG_FUNCTION); +} + +/* subroutine-subprogram is + * subroutine-stmt (is [RECURSIVE] SUBROUTINE subroutine-name etc.) + * [specification-part] + * [execution-part] + * [internal-subprogram-part] + * end-subroutine-stmt (is END [SUBROUTINE [function-name]]) + */ +static void parseSubroutineSubprogram (tokenInfo *const token) +{ + parseSubprogram (token, TAG_SUBROUTINE); +} + +/* main-program is + * [program-stmt] (is PROGRAM program-name) + * [specification-part] + * [execution-part] + * [internal-subprogram-part ] + * end-program-stmt + */ +static void parseMainProgram (tokenInfo *const token) +{ + parseSubprogram (token, TAG_PROGRAM); +} + +/* program-unit + * is main-program + * or external-subprogram (is function-subprogram or subroutine-subprogram) + * or module + * or block-data + */ +static void parseProgramUnit (tokenInfo *const token) +{ + readToken (token); + do + { + if (isType (token, TOKEN_STATEMENT_END)) + readToken (token); + else switch (token->keyword) + { + case KEYWORD_block: parseBlockData (token); break; + case KEYWORD_end: skipToNextStatement (token); break; + case KEYWORD_function: parseFunctionSubprogram (token); break; + case KEYWORD_module: parseModule (token); break; + case KEYWORD_program: parseMainProgram (token); break; + case KEYWORD_subroutine: parseSubroutineSubprogram (token); break; + + default: + if (isSubprogramPrefix (token)) + readToken (token); + else + { + boolean one = parseSpecificationPart (token); + boolean two = parseExecutionPart (token); + if (! (one || two)) + readToken (token); + } + break; + } + } while (TRUE); +} + +static boolean findFortranTags (const unsigned int passCount) +{ + tokenInfo *token; + exception_t exception; + boolean retry; + + Assert (passCount < 3); + Parent = newToken (); + token = newToken (); + FreeSourceForm = (boolean) (passCount > 1); + Column = 0; + exception = (exception_t) setjmp (Exception); + if (exception == ExceptionEOF) + retry = FALSE; + else if (exception == ExceptionFixedFormat && ! FreeSourceForm) + { + verbose ("%s: not fixed source form; retry as free source form\n", + getInputFileName ()); + retry = TRUE; + } + else + { + parseProgramUnit (token); + retry = FALSE; + } + ancestorClear (); + deleteToken (token); + deleteToken (Parent); + + return retry; +} + +static void initialize (const langType language) +{ + Lang_fortran = language; + buildFortranKeywordHash (); +} + +extern parserDefinition* FortranParser (void) +{ + static const char *const extensions [] = { + "f", "for", "ftn", "f77", "f90", "f95", +#ifndef CASE_INSENSITIVE_FILENAMES + "F", "FOR", "FTN", "F77", "F90", "F95", +#endif + NULL + }; + parserDefinition* def = parserNew ("Fortran"); + def->kinds = FortranKinds; + def->kindCount = KIND_COUNT (FortranKinds); + def->extensions = extensions; + def->parser2 = findFortranTags; + def->initialize = initialize; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/general.h b/general.h new file mode 100644 index 0000000..2d1d629 --- /dev/null +++ b/general.h @@ -0,0 +1,127 @@ +/* +* $Id: general.h 508 2007-05-03 03:20:59Z dhiebert $ +* +* Copyright (c) 1998-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Provides the general (non-ctags-specific) environment assumed by all. +*/ +#ifndef _GENERAL_H +#define _GENERAL_H + +/* +* INCLUDE FILES +*/ +#if defined (HAVE_CONFIG_H) +# include +#elif defined (AMIGA) +# include "e_amiga.h" +#elif defined (DJGPP) +# include "e_djgpp.h" +#elif defined (macintosh) +# include "e_mac.h" +#elif defined (MSDOS) || defined (WIN32) +# include "e_msoft.h" +#elif defined (OS2) +# include "e_os2.h" +#elif defined (QDOS) +# include "e_qdos.h" +#elif defined (RISCOS) +# include "e_riscos.h" +#elif defined (__vms) || defined (VMS) +# include "e_vms.h" +# ifndef VMS +# define VMS 1 +# endif +#endif + + +/* +* MACROS +*/ + +/* Define standard error destination + */ +#ifndef errout +# define errout stderr +#endif + +/* Define regex if supported */ +#if (defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)) +# define HAVE_REGEX 1 +#endif + +/* This is a helpful internal feature of later versions (> 2.7) of GCC + * to prevent warnings about unused variables. + */ +#if (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 7)) && !defined (__GNUG__) +# define __unused__ __attribute__((unused)) +# define __printf__(s,f) __attribute__((format (printf, s, f))) +#else +# define __unused__ +# define __printf__(s,f) +#endif + +/* + * Portability macros + */ +#if !defined(HAVE_STRCASECMP) && !defined(strcasecmp) +# ifdef HAVE_STRICMP +# define strcasecmp(s1,s2) stricmp(s1,s2) +# else +# define strcasecmp(s1,s2) struppercmp(s1,s2) +# endif +#endif + +#if !defined(HAVE_STRNCASECMP) && !defined(strncasecmp) +# ifdef HAVE_STRNICMP +# define strncasecmp(s1,s2,n) strnicmp(s1,s2,n) +# else +# define strncasecmp(s1,s2,n) strnuppercmp(s1,s2,n) +# endif +#endif + +/* +* DATA DECLARATIONS +*/ + +#undef FALSE +#undef TRUE +#ifdef VAXC +typedef enum { FALSE, TRUE } booleanType; +typedef int boolean; +#else +# ifdef __cplusplus +typedef bool boolean; +#define FALSE false +#define TRUE true +# else +typedef enum { FALSE, TRUE } boolean; +# endif +#endif + +#if ! defined (HAVE_FGETPOS) && ! defined (fpos_t) +# define fpos_t long +#endif + +/* +* FUNCTION PROTOTYPES +*/ + +#if defined (NEED_PROTO_REMOVE) && defined (HAVE_REMOVE) +extern int remove (const char *); +#endif + +#if defined (NEED_PROTO_UNLINK) && ! defined (HAVE_REMOVE) +extern void *unlink (const char *); +#endif + +#ifdef NEED_PROTO_GETENV +extern char *getenv (const char *); +#endif + +#endif /* _GENERAL_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/get.c b/get.c new file mode 100644 index 0000000..d74ed95 --- /dev/null +++ b/get.c @@ -0,0 +1,669 @@ +/* +* $Id: get.c 559 2007-06-17 03:30:09Z elliotth $ +* +* Copyright (c) 1996-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains the high level source read functions (preprocessor +* directives are handled within this level). +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "debug.h" +#include "entry.h" +#include "get.h" +#include "options.h" +#include "read.h" +#include "vstring.h" + +/* +* MACROS +*/ +#define stringMatch(s1,s2) (strcmp (s1,s2) == 0) +#define isspacetab(c) ((c) == SPACE || (c) == TAB) + +/* +* DATA DECLARATIONS +*/ +typedef enum { COMMENT_NONE, COMMENT_C, COMMENT_CPLUS } Comment; + +enum eCppLimits { + MaxCppNestingLevel = 20, + MaxDirectiveName = 10 +}; + +/* Defines the one nesting level of a preprocessor conditional. + */ +typedef struct sConditionalInfo { + boolean ignoreAllBranches; /* ignoring parent conditional branch */ + boolean singleBranch; /* choose only one branch */ + boolean branchChosen; /* branch already selected */ + boolean ignoring; /* current ignore state */ +} conditionalInfo; + +enum eState { + DRCTV_NONE, /* no known directive - ignore to end of line */ + DRCTV_DEFINE, /* "#define" encountered */ + DRCTV_HASH, /* initial '#' read; determine directive */ + DRCTV_IF, /* "#if" or "#ifdef" encountered */ + DRCTV_PRAGMA, /* #pragma encountered */ + DRCTV_UNDEF /* "#undef" encountered */ +}; + +/* Defines the current state of the pre-processor. + */ +typedef struct sCppState { + int ungetch, ungetch2; /* ungotten characters, if any */ + boolean resolveRequired; /* must resolve if/else/elif/endif branch */ + boolean hasAtLiteralStrings; /* supports @"c:\" strings */ + struct sDirective { + enum eState state; /* current directive being processed */ + boolean accept; /* is a directive syntactically permitted? */ + vString * name; /* macro name */ + unsigned int nestLevel; /* level 0 is not used */ + conditionalInfo ifdef [MaxCppNestingLevel]; + } directive; +} cppState; + +/* +* DATA DEFINITIONS +*/ + +/* Use brace formatting to detect end of block. + */ +static boolean BraceFormat = FALSE; + +static cppState Cpp = { + '\0', '\0', /* ungetch characters */ + FALSE, /* resolveRequired */ + FALSE, /* hasAtLiteralStrings */ + { + DRCTV_NONE, /* state */ + FALSE, /* accept */ + NULL, /* tag name */ + 0, /* nestLevel */ + { {FALSE,FALSE,FALSE,FALSE} } /* ifdef array */ + } /* directive */ +}; + +/* +* FUNCTION DEFINITIONS +*/ + +extern boolean isBraceFormat (void) +{ + return BraceFormat; +} + +extern unsigned int getDirectiveNestLevel (void) +{ + return Cpp.directive.nestLevel; +} + +extern void cppInit (const boolean state, const boolean hasAtLiteralStrings) +{ + BraceFormat = state; + + Cpp.ungetch = '\0'; + Cpp.ungetch2 = '\0'; + Cpp.resolveRequired = FALSE; + Cpp.hasAtLiteralStrings = hasAtLiteralStrings; + + Cpp.directive.state = DRCTV_NONE; + Cpp.directive.accept = TRUE; + Cpp.directive.nestLevel = 0; + + Cpp.directive.ifdef [0].ignoreAllBranches = FALSE; + Cpp.directive.ifdef [0].singleBranch = FALSE; + Cpp.directive.ifdef [0].branchChosen = FALSE; + Cpp.directive.ifdef [0].ignoring = FALSE; + + if (Cpp.directive.name == NULL) + Cpp.directive.name = vStringNew (); + else + vStringClear (Cpp.directive.name); +} + +extern void cppTerminate (void) +{ + if (Cpp.directive.name != NULL) + { + vStringDelete (Cpp.directive.name); + Cpp.directive.name = NULL; + } +} + +extern void cppBeginStatement (void) +{ + Cpp.resolveRequired = TRUE; +} + +extern void cppEndStatement (void) +{ + Cpp.resolveRequired = FALSE; +} + +/* +* Scanning functions +* +* This section handles preprocessor directives. It strips out all +* directives and may emit a tag for #define directives. +*/ + +/* This puts a character back into the input queue for the source File. + * Up to two characters may be ungotten. + */ +extern void cppUngetc (const int c) +{ + Assert (Cpp.ungetch2 == '\0'); + Cpp.ungetch2 = Cpp.ungetch; + Cpp.ungetch = c; +} + +/* Reads a directive, whose first character is given by "c", into "name". + */ +static boolean readDirective (int c, char *const name, unsigned int maxLength) +{ + unsigned int i; + + for (i = 0 ; i < maxLength - 1 ; ++i) + { + if (i > 0) + { + c = fileGetc (); + if (c == EOF || ! isalpha (c)) + { + fileUngetc (c); + break; + } + } + name [i] = c; + } + name [i] = '\0'; /* null terminate */ + + return (boolean) isspacetab (c); +} + +/* Reads an identifier, whose first character is given by "c", into "tag", + * together with the file location and corresponding line number. + */ +static void readIdentifier (int c, vString *const name) +{ + vStringClear (name); + do + { + vStringPut (name, c); + } while (c = fileGetc (), (c != EOF && isident (c))); + fileUngetc (c); + vStringTerminate (name); +} + +static conditionalInfo *currentConditional (void) +{ + return &Cpp.directive.ifdef [Cpp.directive.nestLevel]; +} + +static boolean isIgnore (void) +{ + return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring; +} + +static boolean setIgnore (const boolean ignore) +{ + return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring = ignore; +} + +static boolean isIgnoreBranch (void) +{ + conditionalInfo *const ifdef = currentConditional (); + + /* Force a single branch if an incomplete statement is discovered + * en route. This may have allowed earlier branches containing complete + * statements to be followed, but we must follow no further branches. + */ + if (Cpp.resolveRequired && ! BraceFormat) + ifdef->singleBranch = TRUE; + + /* We will ignore this branch in the following cases: + * + * 1. We are ignoring all branches (conditional was within an ignored + * branch of the parent conditional) + * 2. A branch has already been chosen and either of: + * a. A statement was incomplete upon entering the conditional + * b. A statement is incomplete upon encountering a branch + */ + return (boolean) (ifdef->ignoreAllBranches || + (ifdef->branchChosen && ifdef->singleBranch)); +} + +static void chooseBranch (void) +{ + if (! BraceFormat) + { + conditionalInfo *const ifdef = currentConditional (); + + ifdef->branchChosen = (boolean) (ifdef->singleBranch || + Cpp.resolveRequired); + } +} + +/* Pushes one nesting level for an #if directive, indicating whether or not + * the branch should be ignored and whether a branch has already been chosen. + */ +static boolean pushConditional (const boolean firstBranchChosen) +{ + const boolean ignoreAllBranches = isIgnore (); /* current ignore */ + boolean ignoreBranch = FALSE; + + if (Cpp.directive.nestLevel < (unsigned int) MaxCppNestingLevel - 1) + { + conditionalInfo *ifdef; + + ++Cpp.directive.nestLevel; + ifdef = currentConditional (); + + /* We take a snapshot of whether there is an incomplete statement in + * progress upon encountering the preprocessor conditional. If so, + * then we will flag that only a single branch of the conditional + * should be followed. + */ + ifdef->ignoreAllBranches = ignoreAllBranches; + ifdef->singleBranch = Cpp.resolveRequired; + ifdef->branchChosen = firstBranchChosen; + ifdef->ignoring = (boolean) (ignoreAllBranches || ( + ! firstBranchChosen && ! BraceFormat && + (ifdef->singleBranch || !Option.if0))); + ignoreBranch = ifdef->ignoring; + } + return ignoreBranch; +} + +/* Pops one nesting level for an #endif directive. + */ +static boolean popConditional (void) +{ + if (Cpp.directive.nestLevel > 0) + --Cpp.directive.nestLevel; + + return isIgnore (); +} + +static void makeDefineTag (const char *const name) +{ + const boolean isFileScope = (boolean) (! isHeaderFile ()); + + if (includingDefineTags () && + (! isFileScope || Option.include.fileScope)) + { + tagEntryInfo e; + initTagEntry (&e, name); + e.lineNumberEntry = (boolean) (Option.locate != EX_PATTERN); + e.isFileScope = isFileScope; + e.truncateLine = TRUE; + e.kindName = "macro"; + e.kind = 'd'; + makeTagEntry (&e); + } +} + +static void directiveDefine (const int c) +{ + if (isident1 (c)) + { + readIdentifier (c, Cpp.directive.name); + if (! isIgnore ()) + makeDefineTag (vStringValue (Cpp.directive.name)); + } + Cpp.directive.state = DRCTV_NONE; +} + +static void directivePragma (int c) +{ + if (isident1 (c)) + { + readIdentifier (c, Cpp.directive.name); + if (stringMatch (vStringValue (Cpp.directive.name), "weak")) + { + /* generate macro tag for weak name */ + do + { + c = fileGetc (); + } while (c == SPACE); + if (isident1 (c)) + { + readIdentifier (c, Cpp.directive.name); + makeDefineTag (vStringValue (Cpp.directive.name)); + } + } + } + Cpp.directive.state = DRCTV_NONE; +} + +static boolean directiveIf (const int c) +{ + DebugStatement ( const boolean ignore0 = isIgnore (); ) + const boolean ignore = pushConditional ((boolean) (c != '0')); + + Cpp.directive.state = DRCTV_NONE; + DebugStatement ( debugCppNest (TRUE, Cpp.directive.nestLevel); + if (ignore != ignore0) debugCppIgnore (ignore); ) + + return ignore; +} + +static boolean directiveHash (const int c) +{ + boolean ignore = FALSE; + char directive [MaxDirectiveName]; + DebugStatement ( const boolean ignore0 = isIgnore (); ) + + readDirective (c, directive, MaxDirectiveName); + if (stringMatch (directive, "define")) + Cpp.directive.state = DRCTV_DEFINE; + else if (stringMatch (directive, "undef")) + Cpp.directive.state = DRCTV_UNDEF; + else if (strncmp (directive, "if", (size_t) 2) == 0) + Cpp.directive.state = DRCTV_IF; + else if (stringMatch (directive, "elif") || + stringMatch (directive, "else")) + { + ignore = setIgnore (isIgnoreBranch ()); + if (! ignore && stringMatch (directive, "else")) + chooseBranch (); + Cpp.directive.state = DRCTV_NONE; + DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); ) + } + else if (stringMatch (directive, "endif")) + { + DebugStatement ( debugCppNest (FALSE, Cpp.directive.nestLevel); ) + ignore = popConditional (); + Cpp.directive.state = DRCTV_NONE; + DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); ) + } + else if (stringMatch (directive, "pragma")) + Cpp.directive.state = DRCTV_PRAGMA; + else + Cpp.directive.state = DRCTV_NONE; + + return ignore; +} + +/* Handles a pre-processor directive whose first character is given by "c". + */ +static boolean handleDirective (const int c) +{ + boolean ignore = isIgnore (); + + switch (Cpp.directive.state) + { + case DRCTV_NONE: ignore = isIgnore (); break; + case DRCTV_DEFINE: directiveDefine (c); break; + case DRCTV_HASH: ignore = directiveHash (c); break; + case DRCTV_IF: ignore = directiveIf (c); break; + case DRCTV_PRAGMA: directivePragma (c); break; + case DRCTV_UNDEF: directiveDefine (c); break; + } + return ignore; +} + +/* Called upon reading of a slash ('/') characters, determines whether a + * comment is encountered, and its type. + */ +static Comment isComment (void) +{ + Comment comment; + const int next = fileGetc (); + + if (next == '*') + comment = COMMENT_C; + else if (next == '/') + comment = COMMENT_CPLUS; + else + { + fileUngetc (next); + comment = COMMENT_NONE; + } + return comment; +} + +/* Skips over a C style comment. According to ANSI specification a comment + * is treated as white space, so we perform this substitution. + */ +int skipOverCComment (void) +{ + int c = fileGetc (); + + while (c != EOF) + { + if (c != '*') + c = fileGetc (); + else + { + const int next = fileGetc (); + + if (next != '/') + c = next; + else + { + c = SPACE; /* replace comment with space */ + break; + } + } + } + return c; +} + +/* Skips over a C++ style comment. + */ +static int skipOverCplusComment (void) +{ + int c; + + while ((c = fileGetc ()) != EOF) + { + if (c == BACKSLASH) + fileGetc (); /* throw away next character, too */ + else if (c == NEWLINE) + break; + } + return c; +} + +/* Skips to the end of a string, returning a special character to + * symbolically represent a generic string. + */ +static int skipToEndOfString (boolean ignoreBackslash) +{ + int c; + + while ((c = fileGetc ()) != EOF) + { + if (c == BACKSLASH && ! ignoreBackslash) + fileGetc (); /* throw away next character, too */ + else if (c == DOUBLE_QUOTE) + break; + } + return STRING_SYMBOL; /* symbolic representation of string */ +} + +/* Skips to the end of the three (possibly four) 'c' sequence, returning a + * special character to symbolically represent a generic character. + * Also detects Vera numbers that include a base specifier (ie. 'b1010). + */ +static int skipToEndOfChar (void) +{ + int c; + int count = 0, veraBase = '\0'; + + while ((c = fileGetc ()) != EOF) + { + ++count; + if (c == BACKSLASH) + fileGetc (); /* throw away next character, too */ + else if (c == SINGLE_QUOTE) + break; + else if (c == NEWLINE) + { + fileUngetc (c); + break; + } + else if (count == 1 && strchr ("DHOB", toupper (c)) != NULL) + veraBase = c; + else if (veraBase != '\0' && ! isalnum (c)) + { + fileUngetc (c); + break; + } + } + return CHAR_SYMBOL; /* symbolic representation of character */ +} + +/* This function returns the next character, stripping out comments, + * C pre-processor directives, and the contents of single and double + * quoted strings. In short, strip anything which places a burden upon + * the tokenizer. + */ +extern int cppGetc (void) +{ + boolean directive = FALSE; + boolean ignore = FALSE; + int c; + + if (Cpp.ungetch != '\0') + { + c = Cpp.ungetch; + Cpp.ungetch = Cpp.ungetch2; + Cpp.ungetch2 = '\0'; + return c; /* return here to avoid re-calling debugPutc () */ + } + else do + { + c = fileGetc (); +process: + switch (c) + { + case EOF: + ignore = FALSE; + directive = FALSE; + break; + + case TAB: + case SPACE: + break; /* ignore most white space */ + + case NEWLINE: + if (directive && ! ignore) + directive = FALSE; + Cpp.directive.accept = TRUE; + break; + + case DOUBLE_QUOTE: + Cpp.directive.accept = FALSE; + c = skipToEndOfString (FALSE); + break; + + case '#': + if (Cpp.directive.accept) + { + directive = TRUE; + Cpp.directive.state = DRCTV_HASH; + Cpp.directive.accept = FALSE; + } + break; + + case SINGLE_QUOTE: + Cpp.directive.accept = FALSE; + c = skipToEndOfChar (); + break; + + case '/': + { + const Comment comment = isComment (); + + if (comment == COMMENT_C) + c = skipOverCComment (); + else if (comment == COMMENT_CPLUS) + { + c = skipOverCplusComment (); + if (c == NEWLINE) + fileUngetc (c); + } + else + Cpp.directive.accept = FALSE; + break; + } + + case BACKSLASH: + { + int next = fileGetc (); + + if (next == NEWLINE) + continue; + else if (next == '?') + cppUngetc (next); + else + fileUngetc (next); + break; + } + + case '?': + { + int next = fileGetc (); + if (next != '?') + fileUngetc (next); + else + { + next = fileGetc (); + switch (next) + { + case '(': c = '['; break; + case ')': c = ']'; break; + case '<': c = '{'; break; + case '>': c = '}'; break; + case '/': c = BACKSLASH; goto process; + case '!': c = '|'; break; + case SINGLE_QUOTE: c = '^'; break; + case '-': c = '~'; break; + case '=': c = '#'; goto process; + default: + fileUngetc (next); + cppUngetc ('?'); + break; + } + } + } break; + + default: + if (c == '@' && Cpp.hasAtLiteralStrings) + { + int next = fileGetc (); + if (next == DOUBLE_QUOTE) + { + Cpp.directive.accept = FALSE; + c = skipToEndOfString (TRUE); + break; + } + } + Cpp.directive.accept = FALSE; + if (directive) + ignore = handleDirective (c); + break; + } + } while (directive || ignore); + + DebugStatement ( debugPutc (DEBUG_CPP, c); ) + DebugStatement ( if (c == NEWLINE) + debugPrintf (DEBUG_CPP, "%6ld: ", getInputLineNumber () + 1); ) + + return c; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/get.h b/get.h new file mode 100644 index 0000000..d523437 --- /dev/null +++ b/get.h @@ -0,0 +1,50 @@ +/* +* $Id: get.h 525 2007-05-28 01:50:41Z elliotth $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to get.c +*/ +#ifndef _GET_H +#define _GET_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include "ctags.h" /* to define langType */ + +/* +* MACROS +*/ +/* Is the character valid as a character of a C identifier? + * VMS allows '$' in identifiers. + */ +#define isident(c) (isalnum(c) || (c) == '_' || (c) == '$') + +/* Is the character valid as the first character of a C identifier? + * C++ allows '~' in destructors. + * VMS allows '$' in identifiers. + */ +#define isident1(c) (isalpha(c) || (c) == '_' || (c) == '~' || (c) == '$') + +/* +* FUNCTION PROTOTYPES +*/ +extern boolean isBraceFormat (void); +extern unsigned int getDirectiveNestLevel (void); +extern void cppInit (const boolean state, const boolean hasAtLiteralStrings); +extern void cppTerminate (void); +extern void cppBeginStatement (void); +extern void cppEndStatement (void); +extern void cppUngetc (const int c); +extern int cppGetc (void); +extern int skipOverCComment (void); + +#endif /* _GET_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/gnu_regex/.svn/all-wcprops b/gnu_regex/.svn/all-wcprops new file mode 100644 index 0000000..950b571 --- /dev/null +++ b/gnu_regex/.svn/all-wcprops @@ -0,0 +1,47 @@ +K 25 +svn:wc:ra_dav:version-url +V 43 +/svnroot/ctags/!svn/ver/710/trunk/gnu_regex +END +regex.h +K 25 +svn:wc:ra_dav:version-url +V 51 +/svnroot/ctags/!svn/ver/707/trunk/gnu_regex/regex.h +END +regexec.c +K 25 +svn:wc:ra_dav:version-url +V 53 +/svnroot/ctags/!svn/ver/710/trunk/gnu_regex/regexec.c +END +regex_internal.c +K 25 +svn:wc:ra_dav:version-url +V 60 +/svnroot/ctags/!svn/ver/710/trunk/gnu_regex/regex_internal.c +END +README.txt +K 25 +svn:wc:ra_dav:version-url +V 54 +/svnroot/ctags/!svn/ver/707/trunk/gnu_regex/README.txt +END +regex.c +K 25 +svn:wc:ra_dav:version-url +V 51 +/svnroot/ctags/!svn/ver/707/trunk/gnu_regex/regex.c +END +regex_internal.h +K 25 +svn:wc:ra_dav:version-url +V 60 +/svnroot/ctags/!svn/ver/710/trunk/gnu_regex/regex_internal.h +END +regcomp.c +K 25 +svn:wc:ra_dav:version-url +V 53 +/svnroot/ctags/!svn/ver/710/trunk/gnu_regex/regcomp.c +END diff --git a/gnu_regex/.svn/entries b/gnu_regex/.svn/entries new file mode 100644 index 0000000..8b5cf56 --- /dev/null +++ b/gnu_regex/.svn/entries @@ -0,0 +1,112 @@ +10 + +dir +720 +https://ctags.svn.sourceforge.net/svnroot/ctags/trunk/gnu_regex +https://ctags.svn.sourceforge.net/svnroot/ctags + + + +2009-07-04T05:53:16.648205Z +710 +dhiebert + + + + + + + + + + + + + + +c5d04d22-be80-434c-894e-aa346cc9e8e8 + +regex.h +file + + + + +2008-01-16T10:09:47.000000Z +2d49479cad13fa2a1c106bde452bbf5f +2009-07-04T05:26:42.065968Z +707 +dhiebert + +regexec.c +file + + + + +2009-07-04T05:01:35.000000Z +de02578fbe56d47c8d9ab3817b7766bc +2009-07-04T05:53:16.648205Z +710 +dhiebert + +regex_internal.c +file + + + + +2009-01-08T00:22:50.000000Z +73d91ed18d6fed41faa69ecd116338e0 +2009-07-04T05:53:16.648205Z +710 +dhiebert + +README.txt +file + + + + +2009-07-04T05:09:02.000000Z +248f348e18c15aabc6595f807e117388 +2009-07-04T05:26:42.065968Z +707 +dhiebert + +regex.c +file + + + + +2009-07-03T17:10:52.000000Z +170f8405dec70b235f2551325b43cd58 +2009-07-04T05:26:42.065968Z +707 +dhiebert + +regex_internal.h +file + + + + +2009-07-04T05:23:55.000000Z +9a4cbd70cb786603c081be91fb697f12 +2009-07-04T05:53:16.648205Z +710 +dhiebert + +regcomp.c +file + + + + +2009-07-04T05:08:27.000000Z +7fefcec74cd7fe150ffd275f589cb7db +2009-07-04T05:53:16.648205Z +710 +dhiebert + diff --git a/gnu_regex/.svn/text-base/README.txt.svn-base b/gnu_regex/.svn/text-base/README.txt.svn-base new file mode 100644 index 0000000..8fccbea --- /dev/null +++ b/gnu_regex/.svn/text-base/README.txt.svn-base @@ -0,0 +1,5 @@ +These source files were taken from the GNU glibc-2.10.1 package. + + ftp://ftp.gnu.org/gnu/glibc/glibc-2.10.1.tar.bz2 + +Minor changes were made to eliminate compiler errors and warnings. diff --git a/gnu_regex/.svn/text-base/regcomp.c.svn-base b/gnu_regex/.svn/text-base/regcomp.c.svn-base new file mode 100644 index 0000000..1f3daf2 --- /dev/null +++ b/gnu_regex/.svn/text-base/regcomp.c.svn-base @@ -0,0 +1,3818 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002,2003,2004,2005,2006,2007,2009 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, + size_t length, reg_syntax_t syntax); +static void re_compile_fastmap_iter (regex_t *bufp, + const re_dfastate_t *init_state, + char *fastmap); +static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); +#ifdef RE_ENABLE_I18N +static void free_charset (re_charset_t *cset); +#endif /* RE_ENABLE_I18N */ +static void free_workarea_compile (regex_t *preg); +static reg_errcode_t create_initial_state (re_dfa_t *dfa); +#ifdef RE_ENABLE_I18N +static void optimize_utf8 (re_dfa_t *dfa); +#endif +static reg_errcode_t analyze (regex_t *preg); +static reg_errcode_t preorder (bin_tree_t *root, + reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra); +static reg_errcode_t postorder (bin_tree_t *root, + reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra); +static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node); +static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node); +static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg, + bin_tree_t *node); +static reg_errcode_t calc_first (void *extra, bin_tree_t *node); +static reg_errcode_t calc_next (void *extra, bin_tree_t *node); +static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); +static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint); +static int search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint); +static reg_errcode_t calc_eclosure (re_dfa_t *dfa); +static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, + int node, int root); +static reg_errcode_t calc_inveclosure (re_dfa_t *dfa); +static int fetch_number (re_string_t *input, re_token_t *token, + reg_syntax_t syntax); +static int peek_token (re_token_t *token, re_string_t *input, + reg_syntax_t syntax) internal_function; +static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, + reg_syntax_t syntax, reg_errcode_t *err); +static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, + re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err); +static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, + reg_errcode_t *err); +static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, + re_string_t *regexp, + re_token_t *token, int token_len, + re_dfa_t *dfa, + reg_syntax_t syntax, + int accept_hyphen); +static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, + re_string_t *regexp, + re_token_t *token); +#ifdef RE_ENABLE_I18N +static reg_errcode_t build_equiv_class (bitset_t sbcset, + re_charset_t *mbcset, + int *equiv_class_alloc, + const unsigned char *name); +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, + re_charset_t *mbcset, + int *char_class_alloc, + const unsigned char *class_name, + reg_syntax_t syntax); +#else /* not RE_ENABLE_I18N */ +static reg_errcode_t build_equiv_class (bitset_t sbcset, + const unsigned char *name); +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, + const unsigned char *class_name, + reg_syntax_t syntax); +#endif /* not RE_ENABLE_I18N */ +static bin_tree_t *build_charclass_op (re_dfa_t *dfa, + RE_TRANSLATE_TYPE trans, + const unsigned char *class_name, + const unsigned char *extra, + int non_match, reg_errcode_t *err); +static bin_tree_t *create_tree (re_dfa_t *dfa, + bin_tree_t *left, bin_tree_t *right, + re_token_type_t type); +static bin_tree_t *create_token_tree (re_dfa_t *dfa, + bin_tree_t *left, bin_tree_t *right, + const re_token_t *token); +static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); +static void free_token (re_token_t *node); +static reg_errcode_t free_tree (void *extra, bin_tree_t *node); +static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node); + +/* This table gives an error message for each of the error codes listed + in regex.h. Obviously the order here has to be same as there. + POSIX doesn't require that we do anything for REG_NOERROR, + but why not be nice? */ + +const char __re_error_msgid[] attribute_hidden = + { +#define REG_NOERROR_IDX 0 + gettext_noop ("Success") /* REG_NOERROR */ + "\0" +#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") + gettext_noop ("No match") /* REG_NOMATCH */ + "\0" +#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") + gettext_noop ("Invalid regular expression") /* REG_BADPAT */ + "\0" +#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") + gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ + "\0" +#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") + gettext_noop ("Invalid character class name") /* REG_ECTYPE */ + "\0" +#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") + gettext_noop ("Trailing backslash") /* REG_EESCAPE */ + "\0" +#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") + gettext_noop ("Invalid back reference") /* REG_ESUBREG */ + "\0" +#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") + gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ + "\0" +#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") + gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ + "\0" +#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") + gettext_noop ("Unmatched \\{") /* REG_EBRACE */ + "\0" +#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") + gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ + "\0" +#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") + gettext_noop ("Invalid range end") /* REG_ERANGE */ + "\0" +#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") + gettext_noop ("Memory exhausted") /* REG_ESPACE */ + "\0" +#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") + gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ + "\0" +#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") + gettext_noop ("Premature end of regular expression") /* REG_EEND */ + "\0" +#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") + gettext_noop ("Regular expression too big") /* REG_ESIZE */ + "\0" +#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") + gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ + }; + +const size_t __re_error_msgid_idx[] attribute_hidden = + { + REG_NOERROR_IDX, + REG_NOMATCH_IDX, + REG_BADPAT_IDX, + REG_ECOLLATE_IDX, + REG_ECTYPE_IDX, + REG_EESCAPE_IDX, + REG_ESUBREG_IDX, + REG_EBRACK_IDX, + REG_EPAREN_IDX, + REG_EBRACE_IDX, + REG_BADBR_IDX, + REG_ERANGE_IDX, + REG_ESPACE_IDX, + REG_BADRPT_IDX, + REG_EEND_IDX, + REG_ESIZE_IDX, + REG_ERPAREN_IDX + }; + +/* Entry points for GNU code. */ + +/* re_compile_pattern is the GNU regular expression compiler: it + compiles PATTERN (of length LENGTH) and puts the result in BUFP. + Returns 0 if the pattern was valid, otherwise an error string. + + Assumes the `allocated' (and perhaps `buffer') and `translate' fields + are set in BUFP on entry. */ + +const char * +re_compile_pattern (pattern, length, bufp) + const char *pattern; + size_t length; + struct re_pattern_buffer *bufp; +{ + reg_errcode_t ret; + + /* And GNU code determines whether or not to get register information + by passing null for the REGS argument to re_match, etc., not by + setting no_sub, unless RE_NO_SUB is set. */ + bufp->no_sub = !!(re_syntax_options & RE_NO_SUB); + + /* Match anchors at newline. */ + bufp->newline_anchor = 1; + + ret = re_compile_internal (bufp, pattern, length, re_syntax_options); + + if (!ret) + return NULL; + return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); +} +#ifdef _LIBC +weak_alias (__re_compile_pattern, re_compile_pattern) +#endif + +/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can + also be assigned to arbitrarily: each pattern buffer stores its own + syntax, so it can be changed between regex compilations. */ +/* This has no initializer because initialized variables in Emacs + become read-only after dumping. */ +reg_syntax_t re_syntax_options; + + +/* Specify the precise syntax of regexps for compilation. This provides + for compatibility for various utilities which historically have + different, incompatible syntaxes. + + The argument SYNTAX is a bit mask comprised of the various bits + defined in regex.h. We return the old syntax. */ + +reg_syntax_t +re_set_syntax (syntax) + reg_syntax_t syntax; +{ + reg_syntax_t ret = re_syntax_options; + + re_syntax_options = syntax; + return ret; +} +#ifdef _LIBC +weak_alias (__re_set_syntax, re_set_syntax) +#endif + +int +re_compile_fastmap (bufp) + struct re_pattern_buffer *bufp; +{ + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + char *fastmap = bufp->fastmap; + + memset (fastmap, '\0', sizeof (char) * SBC_MAX); + re_compile_fastmap_iter (bufp, dfa->init_state, fastmap); + if (dfa->init_state != dfa->init_state_word) + re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap); + if (dfa->init_state != dfa->init_state_nl) + re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap); + if (dfa->init_state != dfa->init_state_begbuf) + re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap); + bufp->fastmap_accurate = 1; + return 0; +} +#ifdef _LIBC +weak_alias (__re_compile_fastmap, re_compile_fastmap) +#endif + +static inline void +__attribute ((always_inline)) +re_set_fastmap (char *fastmap, int icase, int ch) +{ + fastmap[ch] = 1; + if (icase) + fastmap[tolower (ch)] = 1; +} + +/* Helper function for re_compile_fastmap. + Compile fastmap for the initial_state INIT_STATE. */ + +static void +re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, + char *fastmap) +{ + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + int node_cnt; + int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE)); + for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) + { + int node = init_state->nodes.elems[node_cnt]; + re_token_type_t type = dfa->nodes[node].type; + + if (type == CHARACTER) + { + re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); +#ifdef RE_ENABLE_I18N + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + { + unsigned char *buf = alloca (dfa->mb_cur_max), *p; + wchar_t wc; + mbstate_t state; + + p = buf; + *p++ = dfa->nodes[node].opr.c; + while (++node < dfa->nodes_len + && dfa->nodes[node].type == CHARACTER + && dfa->nodes[node].mb_partial) + *p++ = dfa->nodes[node].opr.c; + memset (&state, '\0', sizeof (state)); + if (__mbrtowc (&wc, (const char *) buf, p - buf, + &state) == p - buf + && (__wcrtomb ((char *) buf, towlower (wc), &state) + != (size_t) -1)) + re_set_fastmap (fastmap, 0, buf[0]); + } +#endif + } + else if (type == SIMPLE_BRACKET) + { + int i, ch; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + { + int j; + bitset_word_t w = dfa->nodes[node].opr.sbcset[i]; + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (w & ((bitset_word_t) 1 << j)) + re_set_fastmap (fastmap, icase, ch); + } + } +#ifdef RE_ENABLE_I18N + else if (type == COMPLEX_BRACKET) + { + re_charset_t *cset = dfa->nodes[node].opr.mbcset; + int i; + +# ifdef _LIBC + /* See if we have to try all bytes which start multiple collation + elements. + e.g. In da_DK, we want to catch 'a' since "aa" is a valid + collation element, and don't catch 'b' since 'b' is + the only collation element which starts from 'b' (and + it is caught by SIMPLE_BRACKET). */ + if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0 + && (cset->ncoll_syms || cset->nranges)) + { + const int32_t *table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + for (i = 0; i < SBC_MAX; ++i) + if (table[i] < 0) + re_set_fastmap (fastmap, icase, i); + } +# endif /* _LIBC */ + + /* See if we have to start the match at all multibyte characters, + i.e. where we would not find an invalid sequence. This only + applies to multibyte character sets; for single byte character + sets, the SIMPLE_BRACKET again suffices. */ + if (dfa->mb_cur_max > 1 + && (cset->nchar_classes || cset->non_match +# ifdef _LIBC + || cset->nequiv_classes +# endif /* _LIBC */ + )) + { + unsigned char c = 0; + do + { + mbstate_t mbs; + memset (&mbs, 0, sizeof (mbs)); + if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2) + re_set_fastmap (fastmap, false, (int) c); + } + while (++c != 0); + } + + else + { + /* ... Else catch all bytes which can start the mbchars. */ + for (i = 0; i < cset->nmbchars; ++i) + { + char buf[256]; + mbstate_t state; + memset (&state, '\0', sizeof (state)); + if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) + re_set_fastmap (fastmap, icase, *(unsigned char *) buf); + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + { + if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) + != (size_t) -1) + re_set_fastmap (fastmap, false, *(unsigned char *) buf); + } + } + } + } +#endif /* RE_ENABLE_I18N */ + else if (type == OP_PERIOD +#ifdef RE_ENABLE_I18N + || type == OP_UTF8_PERIOD +#endif /* RE_ENABLE_I18N */ + || type == END_OF_RE) + { + memset (fastmap, '\1', sizeof (char) * SBC_MAX); + if (type == END_OF_RE) + bufp->can_be_null = 1; + return; + } + } +} + +/* Entry point for POSIX code. */ +/* regcomp takes a regular expression as a string and compiles it. + + PREG is a regex_t *. We do not expect any fields to be initialized, + since POSIX says we shouldn't. Thus, we set + + `buffer' to the compiled pattern; + `used' to the length of the compiled pattern; + `syntax' to RE_SYNTAX_POSIX_EXTENDED if the + REG_EXTENDED bit in CFLAGS is set; otherwise, to + RE_SYNTAX_POSIX_BASIC; + `newline_anchor' to REG_NEWLINE being set in CFLAGS; + `fastmap' to an allocated space for the fastmap; + `fastmap_accurate' to zero; + `re_nsub' to the number of subexpressions in PATTERN. + + PATTERN is the address of the pattern string. + + CFLAGS is a series of bits which affect compilation. + + If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we + use POSIX basic syntax. + + If REG_NEWLINE is set, then . and [^...] don't match newline. + Also, regexec will try a match beginning after every newline. + + If REG_ICASE is set, then we considers upper- and lowercase + versions of letters to be equivalent when matching. + + If REG_NOSUB is set, then when PREG is passed to regexec, that + routine will report only success or failure, and nothing about the + registers. + + It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for + the return codes and their meanings.) */ + +int +regcomp (preg, pattern, cflags) + regex_t *__restrict preg; + const char *__restrict pattern; + int cflags; +{ + reg_errcode_t ret; + reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED + : RE_SYNTAX_POSIX_BASIC); + + preg->buffer = NULL; + preg->allocated = 0; + preg->used = 0; + + /* Try to allocate space for the fastmap. */ + preg->fastmap = re_malloc (char, SBC_MAX); + if (BE (preg->fastmap == NULL, 0)) + return REG_ESPACE; + + syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; + + /* If REG_NEWLINE is set, newlines are treated differently. */ + if (cflags & REG_NEWLINE) + { /* REG_NEWLINE implies neither . nor [^...] match newline. */ + syntax &= ~RE_DOT_NEWLINE; + syntax |= RE_HAT_LISTS_NOT_NEWLINE; + /* It also changes the matching behavior. */ + preg->newline_anchor = 1; + } + else + preg->newline_anchor = 0; + preg->no_sub = !!(cflags & REG_NOSUB); + preg->translate = NULL; + + ret = re_compile_internal (preg, pattern, strlen (pattern), syntax); + + /* POSIX doesn't distinguish between an unmatched open-group and an + unmatched close-group: both are REG_EPAREN. */ + if (ret == REG_ERPAREN) + ret = REG_EPAREN; + + /* We have already checked preg->fastmap != NULL. */ + if (BE (ret == REG_NOERROR, 1)) + /* Compute the fastmap now, since regexec cannot modify the pattern + buffer. This function never fails in this implementation. */ + (void) re_compile_fastmap (preg); + else + { + /* Some error occurred while compiling the expression. */ + re_free (preg->fastmap); + preg->fastmap = NULL; + } + + return (int) ret; +} +#ifdef _LIBC +weak_alias (__regcomp, regcomp) +#endif + +/* Returns a message corresponding to an error code, ERRCODE, returned + from either regcomp or regexec. We don't use PREG here. */ + +size_t +regerror ( + int errcode, + const regex_t *__restrict preg, + char *__restrict errbuf, + size_t errbuf_size) +{ + const char *msg; + size_t msg_size; + + if (BE (errcode < 0 + || errcode >= (int) (sizeof (__re_error_msgid_idx) + / sizeof (__re_error_msgid_idx[0])), 0)) + /* Only error codes returned by the rest of the code should be passed + to this routine. If we are given anything else, or if other regex + code generates an invalid error code, then the program has a bug. + Dump core so we can fix it. */ + abort (); + + msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]); + + msg_size = strlen (msg) + 1; /* Includes the null. */ + + if (BE (errbuf_size != 0, 1)) + { + if (BE (msg_size > errbuf_size, 0)) + { +#if defined HAVE_MEMPCPY || defined _LIBC + *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; +#else + memcpy (errbuf, msg, errbuf_size - 1); + errbuf[errbuf_size - 1] = 0; +#endif + } + else + memcpy (errbuf, msg, msg_size); + } + + return msg_size; +} +#ifdef _LIBC +weak_alias (__regerror, regerror) +#endif + + +#ifdef RE_ENABLE_I18N +/* This static array is used for the map to single-byte characters when + UTF-8 is used. Otherwise we would allocate memory just to initialize + it the same all the time. UTF-8 is the preferred encoding so this is + a worthwhile optimization. */ +static const bitset_t utf8_sb_map = +{ + /* Set the first 128 bits. */ + [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX +}; +#endif + + +static void +free_dfa_content (re_dfa_t *dfa) +{ + int i, j; + + if (dfa->nodes) + for (i = 0; i < dfa->nodes_len; ++i) + free_token (dfa->nodes + i); + re_free (dfa->nexts); + for (i = 0; i < dfa->nodes_len; ++i) + { + if (dfa->eclosures != NULL) + re_node_set_free (dfa->eclosures + i); + if (dfa->inveclosures != NULL) + re_node_set_free (dfa->inveclosures + i); + if (dfa->edests != NULL) + re_node_set_free (dfa->edests + i); + } + re_free (dfa->edests); + re_free (dfa->eclosures); + re_free (dfa->inveclosures); + re_free (dfa->nodes); + + if (dfa->state_table) + for (i = 0; i <= dfa->state_hash_mask; ++i) + { + struct re_state_table_entry *entry = dfa->state_table + i; + for (j = 0; j < entry->num; ++j) + { + re_dfastate_t *state = entry->array[j]; + free_state (state); + } + re_free (entry->array); + } + re_free (dfa->state_table); +#ifdef RE_ENABLE_I18N + if (dfa->sb_char != utf8_sb_map) + re_free (dfa->sb_char); +#endif + re_free (dfa->subexp_map); +#ifdef DEBUG + re_free (dfa->re_str); +#endif + + re_free (dfa); +} + + +/* Free dynamically allocated space used by PREG. */ + +void +regfree (preg) + regex_t *preg; +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + if (BE (dfa != NULL, 1)) + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + + re_free (preg->fastmap); + preg->fastmap = NULL; + + re_free (preg->translate); + preg->translate = NULL; +} +#ifdef _LIBC +weak_alias (__regfree, regfree) +#endif + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined _REGEX_RE_COMP || defined _LIBC + +/* BSD has one and only one pattern buffer. */ +static struct re_pattern_buffer re_comp_buf; + +char * +# ifdef _LIBC +/* Make these definitions weak in libc, so POSIX programs can redefine + these names if they don't use our functions, and still use + regcomp/regexec above without link errors. */ +weak_function +# endif +re_comp (s) + const char *s; +{ + reg_errcode_t ret; + char *fastmap; + + if (!s) + { + if (!re_comp_buf.buffer) + return gettext ("No previous regular expression"); + return 0; + } + + if (re_comp_buf.buffer) + { + fastmap = re_comp_buf.fastmap; + re_comp_buf.fastmap = NULL; + __regfree (&re_comp_buf); + memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); + re_comp_buf.fastmap = fastmap; + } + + if (re_comp_buf.fastmap == NULL) + { + re_comp_buf.fastmap = (char *) malloc (SBC_MAX); + if (re_comp_buf.fastmap == NULL) + return (char *) gettext (__re_error_msgid + + __re_error_msgid_idx[(int) REG_ESPACE]); + } + + /* Since `re_exec' always passes NULL for the `regs' argument, we + don't need to initialize the pattern buffer fields which affect it. */ + + /* Match anchors at newlines. */ + re_comp_buf.newline_anchor = 1; + + ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options); + + if (!ret) + return NULL; + + /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ + return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); +} + +#ifdef _LIBC +libc_freeres_fn (free_mem) +{ + __regfree (&re_comp_buf); +} +#endif + +#endif /* _REGEX_RE_COMP */ + +/* Internal entry point. + Compile the regular expression PATTERN, whose length is LENGTH. + SYNTAX indicate regular expression's syntax. */ + +static reg_errcode_t +re_compile_internal (regex_t *preg, const char * pattern, size_t length, + reg_syntax_t syntax) +{ + reg_errcode_t err = REG_NOERROR; + re_dfa_t *dfa; + re_string_t regexp; + + /* Initialize the pattern buffer. */ + preg->fastmap_accurate = 0; + preg->syntax = syntax; + preg->not_bol = preg->not_eol = 0; + preg->used = 0; + preg->re_nsub = 0; + preg->can_be_null = 0; + preg->regs_allocated = REGS_UNALLOCATED; + + /* Initialize the dfa. */ + dfa = (re_dfa_t *) preg->buffer; + if (BE (preg->allocated < sizeof (re_dfa_t), 0)) + { + /* If zero allocated, but buffer is non-null, try to realloc + enough space. This loses if buffer's address is bogus, but + that is the user's responsibility. If ->buffer is NULL this + is a simple allocation. */ + dfa = re_realloc (preg->buffer, re_dfa_t, 1); + if (dfa == NULL) + return REG_ESPACE; + preg->allocated = sizeof (re_dfa_t); + preg->buffer = (unsigned char *) dfa; + } + preg->used = sizeof (re_dfa_t); + + err = init_dfa (dfa, length); + if (BE (err != REG_NOERROR, 0)) + { + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + return err; + } +#ifdef DEBUG + /* Note: length+1 will not overflow since it is checked in init_dfa. */ + dfa->re_str = re_malloc (char, length + 1); + strncpy (dfa->re_str, pattern, length + 1); +#endif + + __libc_lock_init (dfa->lock); + + err = re_string_construct (®exp, pattern, length, preg->translate, + syntax & RE_ICASE, dfa); + if (BE (err != REG_NOERROR, 0)) + { + re_compile_internal_free_return: + free_workarea_compile (preg); + re_string_destruct (®exp); + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + return err; + } + + /* Parse the regular expression, and build a structure tree. */ + preg->re_nsub = 0; + dfa->str_tree = parse (®exp, preg, syntax, &err); + if (BE (dfa->str_tree == NULL, 0)) + goto re_compile_internal_free_return; + + /* Analyze the tree and create the nfa. */ + err = analyze (preg); + if (BE (err != REG_NOERROR, 0)) + goto re_compile_internal_free_return; + +#ifdef RE_ENABLE_I18N + /* If possible, do searching in single byte encoding to speed things up. */ + if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) + optimize_utf8 (dfa); +#endif + + /* Then create the initial state of the dfa. */ + err = create_initial_state (dfa); + + /* Release work areas. */ + free_workarea_compile (preg); + re_string_destruct (®exp); + + if (BE (err != REG_NOERROR, 0)) + { + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + } + + return err; +} + +/* Initialize DFA. We use the length of the regular expression PAT_LEN + as the initial length of some arrays. */ + +static reg_errcode_t +init_dfa (re_dfa_t *dfa, size_t pat_len) +{ + unsigned int table_size; +#ifndef _LIBC + char *codeset_name; +#endif + + memset (dfa, '\0', sizeof (re_dfa_t)); + + /* Force allocation of str_tree_storage the first time. */ + dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; + + /* Avoid overflows. */ + if (pat_len == SIZE_MAX) + return REG_ESPACE; + + dfa->nodes_alloc = pat_len + 1; + dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); + + /* table_size = 2 ^ ceil(log pat_len) */ + for (table_size = 1; ; table_size <<= 1) + if (table_size > pat_len) + break; + + dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size); + dfa->state_hash_mask = table_size - 1; + + dfa->mb_cur_max = MB_CUR_MAX; +#ifdef _LIBC + if (dfa->mb_cur_max == 6 + && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0) + dfa->is_utf8 = 1; + dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) + != 0); +#else +# ifdef HAVE_LANGINFO_CODESET + codeset_name = nl_langinfo (CODESET); +# else + codeset_name = getenv ("LC_ALL"); + if (codeset_name == NULL || codeset_name[0] == '\0') + codeset_name = getenv ("LC_CTYPE"); + if (codeset_name == NULL || codeset_name[0] == '\0') + codeset_name = getenv ("LANG"); + if (codeset_name == NULL) + codeset_name = ""; + else if (strchr (codeset_name, '.') != NULL) + codeset_name = strchr (codeset_name, '.') + 1; +# endif + + if (strcasecmp (codeset_name, "UTF-8") == 0 + || strcasecmp (codeset_name, "UTF8") == 0) + dfa->is_utf8 = 1; + + /* We check exhaustively in the loop below if this charset is a + superset of ASCII. */ + dfa->map_notascii = 0; +#endif + +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + if (dfa->is_utf8) + dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; + else + { + int i, j, ch; + + dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); + if (BE (dfa->sb_char == NULL, 0)) + return REG_ESPACE; + + /* Set the bits corresponding to single byte chars. */ + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + { + wint_t wch = __btowc (ch); + if (wch != WEOF) + dfa->sb_char[i] |= (bitset_word_t) 1 << j; +# ifndef _LIBC + if (isascii (ch) && wch != ch) + dfa->map_notascii = 1; +# endif + } + } + } +#endif + + if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)) + return REG_ESPACE; + return REG_NOERROR; +} + +/* Initialize WORD_CHAR table, which indicate which character is + "word". In this case "word" means that it is the word construction + character used by some operators like "\<", "\>", etc. */ + +static void +internal_function +init_word_char (re_dfa_t *dfa) +{ + int i, j, ch; + dfa->word_ops_used = 1; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (isalnum (ch) || ch == '_') + dfa->word_char[i] |= (bitset_word_t) 1 << j; +} + +/* Free the work area which are only used while compiling. */ + +static void +free_workarea_compile (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_storage_t *storage, *next; + for (storage = dfa->str_tree_storage; storage; storage = next) + { + next = storage->next; + re_free (storage); + } + dfa->str_tree_storage = NULL; + dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; + dfa->str_tree = NULL; + re_free (dfa->org_indices); + dfa->org_indices = NULL; +} + +/* Create initial states for all contexts. */ + +static reg_errcode_t +create_initial_state (re_dfa_t *dfa) +{ + int first, i; + reg_errcode_t err; + re_node_set init_nodes; + + /* Initial states have the epsilon closure of the node which is + the first node of the regular expression. */ + first = dfa->str_tree->first->node_idx; + dfa->init_node = first; + err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* The back-references which are in initial states can epsilon transit, + since in this case all of the subexpressions can be null. + Then we add epsilon closures of the nodes which are the next nodes of + the back-references. */ + if (dfa->nbackref > 0) + for (i = 0; i < init_nodes.nelem; ++i) + { + int node_idx = init_nodes.elems[i]; + re_token_type_t type = dfa->nodes[node_idx].type; + + int clexp_idx; + if (type != OP_BACK_REF) + continue; + for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) + { + re_token_t *clexp_node; + clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; + if (clexp_node->type == OP_CLOSE_SUBEXP + && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx) + break; + } + if (clexp_idx == init_nodes.nelem) + continue; + + if (type == OP_BACK_REF) + { + int dest_idx = dfa->edests[node_idx].elems[0]; + if (!re_node_set_contains (&init_nodes, dest_idx)) + { + re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); + i = 0; + } + } + } + + /* It must be the first time to invoke acquire_state. */ + dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); + /* We don't check ERR here, since the initial state must not be NULL. */ + if (BE (dfa->init_state == NULL, 0)) + return err; + if (dfa->init_state->has_constraint) + { + dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, + CONTEXT_WORD); + dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, + CONTEXT_NEWLINE); + dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, + &init_nodes, + CONTEXT_NEWLINE + | CONTEXT_BEGBUF); + if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL, 0)) + return err; + } + else + dfa->init_state_word = dfa->init_state_nl + = dfa->init_state_begbuf = dfa->init_state; + + re_node_set_free (&init_nodes); + return REG_NOERROR; +} + +#ifdef RE_ENABLE_I18N +/* If it is possible to do searching in single byte encoding instead of UTF-8 + to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change + DFA nodes where needed. */ + +static void +optimize_utf8 (re_dfa_t *dfa) +{ + int node, i, mb_chars = 0, has_period = 0; + + for (node = 0; node < dfa->nodes_len; ++node) + switch (dfa->nodes[node].type) + { + case CHARACTER: + if (dfa->nodes[node].opr.c >= 0x80) + mb_chars = 1; + break; + case ANCHOR: + switch (dfa->nodes[node].opr.ctx_type) + { + case LINE_FIRST: + case LINE_LAST: + case BUF_FIRST: + case BUF_LAST: + break; + default: + /* Word anchors etc. cannot be handled. It's okay to test + opr.ctx_type since constraints (for all DFA nodes) are + created by ORing one or more opr.ctx_type values. */ + return; + } + break; + case OP_PERIOD: + has_period = 1; + break; + case OP_BACK_REF: + case OP_ALT: + case END_OF_RE: + case OP_DUP_ASTERISK: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: + break; + case COMPLEX_BRACKET: + return; + case SIMPLE_BRACKET: + /* Just double check. The non-ASCII range starts at 0x80. */ + assert (0x80 % BITSET_WORD_BITS == 0); + for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) + if (dfa->nodes[node].opr.sbcset[i]) + return; + break; + default: + abort (); + } + + if (mb_chars || has_period) + for (node = 0; node < dfa->nodes_len; ++node) + { + if (dfa->nodes[node].type == CHARACTER + && dfa->nodes[node].opr.c >= 0x80) + dfa->nodes[node].mb_partial = 0; + else if (dfa->nodes[node].type == OP_PERIOD) + dfa->nodes[node].type = OP_UTF8_PERIOD; + } + + /* The search can be in single byte locale. */ + dfa->mb_cur_max = 1; + dfa->is_utf8 = 0; + dfa->has_mb_node = dfa->nbackref > 0 || has_period; +} +#endif + +/* Analyze the structure tree, and calculate "first", "next", "edest", + "eclosure", and "inveclosure". */ + +static reg_errcode_t +analyze (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + reg_errcode_t ret; + + /* Allocate arrays. */ + dfa->nexts = re_malloc (int, dfa->nodes_alloc); + dfa->org_indices = re_malloc (int, dfa->nodes_alloc); + dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); + dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); + if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL + || dfa->eclosures == NULL, 0)) + return REG_ESPACE; + + dfa->subexp_map = re_malloc (int, preg->re_nsub); + if (dfa->subexp_map != NULL) + { + int i; + for (i = 0; i < preg->re_nsub; i++) + dfa->subexp_map[i] = i; + preorder (dfa->str_tree, optimize_subexps, dfa); + for (i = 0; i < preg->re_nsub; i++) + if (dfa->subexp_map[i] != i) + break; + if (i == preg->re_nsub) + { + free (dfa->subexp_map); + dfa->subexp_map = NULL; + } + } + + ret = postorder (dfa->str_tree, lower_subexps, preg); + if (BE (ret != REG_NOERROR, 0)) + return ret; + ret = postorder (dfa->str_tree, calc_first, dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + preorder (dfa->str_tree, calc_next, dfa); + ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + ret = calc_eclosure (dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + /* We only need this during the prune_impossible_nodes pass in regexec.c; + skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */ + if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match) + || dfa->nbackref) + { + dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); + if (BE (dfa->inveclosures == NULL, 0)) + return REG_ESPACE; + ret = calc_inveclosure (dfa); + } + + return ret; +} + +/* Our parse trees are very unbalanced, so we cannot use a stack to + implement parse tree visits. Instead, we use parent pointers and + some hairy code in these two functions. */ +static reg_errcode_t +postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) +{ + bin_tree_t *node, *prev; + + for (node = root; ; ) + { + /* Descend down the tree, preferably to the left (or to the right + if that's the only child). */ + while (node->left || node->right) + if (node->left) + node = node->left; + else + node = node->right; + + do + { + reg_errcode_t err = fn (extra, node); + if (BE (err != REG_NOERROR, 0)) + return err; + if (node->parent == NULL) + return REG_NOERROR; + prev = node; + node = node->parent; + } + /* Go up while we have a node that is reached from the right. */ + while (node->right == prev || node->right == NULL); + node = node->right; + } +} + +static reg_errcode_t +preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) +{ + bin_tree_t *node; + + for (node = root; ; ) + { + reg_errcode_t err = fn (extra, node); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* Go to the left node, or up and to the right. */ + if (node->left) + node = node->left; + else + { + bin_tree_t *prev = NULL; + while (node->right == prev || node->right == NULL) + { + prev = node; + node = node->parent; + if (!node) + return REG_NOERROR; + } + node = node->right; + } + } +} + +/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell + re_search_internal to map the inner one's opr.idx to this one's. Adjust + backreferences as well. Requires a preorder visit. */ +static reg_errcode_t +optimize_subexps (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + + if (node->token.type == OP_BACK_REF && dfa->subexp_map) + { + int idx = node->token.opr.idx; + node->token.opr.idx = dfa->subexp_map[idx]; + dfa->used_bkref_map |= 1 << node->token.opr.idx; + } + + else if (node->token.type == SUBEXP + && node->left && node->left->token.type == SUBEXP) + { + int other_idx = node->left->token.opr.idx; + + node->left = node->left->left; + if (node->left) + node->left->parent = node; + + dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; + if (other_idx < BITSET_WORD_BITS) + dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx); + } + + return REG_NOERROR; +} + +/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation + of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */ +static reg_errcode_t +lower_subexps (void *extra, bin_tree_t *node) +{ + regex_t *preg = (regex_t *) extra; + reg_errcode_t err = REG_NOERROR; + + if (node->left && node->left->token.type == SUBEXP) + { + node->left = lower_subexp (&err, preg, node->left); + if (node->left) + node->left->parent = node; + } + if (node->right && node->right->token.type == SUBEXP) + { + node->right = lower_subexp (&err, preg, node->right); + if (node->right) + node->right->parent = node; + } + + return err; +} + +static bin_tree_t * +lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *body = node->left; + bin_tree_t *op, *cls, *tree1, *tree; + + if (preg->no_sub + /* We do not optimize empty subexpressions, because otherwise we may + have bad CONCAT nodes with NULL children. This is obviously not + very common, so we do not lose much. An example that triggers + this case is the sed "script" /\(\)/x. */ + && node->left != NULL + && (node->token.opr.idx >= BITSET_WORD_BITS + || !(dfa->used_bkref_map + & ((bitset_word_t) 1 << node->token.opr.idx)))) + return node->left; + + /* Convert the SUBEXP node to the concatenation of an + OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */ + op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP); + cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP); + tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls; + tree = create_tree (dfa, op, tree1, CONCAT); + if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + + op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx; + op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp; + return tree; +} + +/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton + nodes. Requires a postorder visit. */ +static reg_errcode_t +calc_first (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + if (node->token.type == CONCAT) + { + node->first = node->left->first; + node->node_idx = node->left->node_idx; + } + else + { + node->first = node; + node->node_idx = re_dfa_add_node (dfa, node->token); + if (BE (node->node_idx == -1, 0)) + return REG_ESPACE; + if (node->token.type == ANCHOR) + dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type; + } + return REG_NOERROR; +} + +/* Pass 2: compute NEXT on the tree. Preorder visit. */ +static reg_errcode_t +calc_next (void *extra, bin_tree_t *node) +{ + switch (node->token.type) + { + case OP_DUP_ASTERISK: + node->left->next = node; + break; + case CONCAT: + node->left->next = node->right->first; + node->right->next = node->next; + break; + default: + if (node->left) + node->left->next = node->next; + if (node->right) + node->right->next = node->next; + break; + } + return REG_NOERROR; +} + +/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */ +static reg_errcode_t +link_nfa_nodes (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + int idx = node->node_idx; + reg_errcode_t err = REG_NOERROR; + + switch (node->token.type) + { + case CONCAT: + break; + + case END_OF_RE: + assert (node->next == NULL); + break; + + case OP_DUP_ASTERISK: + case OP_ALT: + { + int left, right; + dfa->has_plural_match = 1; + if (node->left != NULL) + left = node->left->first->node_idx; + else + left = node->next->node_idx; + if (node->right != NULL) + right = node->right->first->node_idx; + else + right = node->next->node_idx; + assert (left > -1); + assert (right > -1); + err = re_node_set_init_2 (dfa->edests + idx, left, right); + } + break; + + case ANCHOR: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: + err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx); + break; + + case OP_BACK_REF: + dfa->nexts[idx] = node->next->node_idx; + if (node->token.type == OP_BACK_REF) + re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); + break; + + default: + assert (!IS_EPSILON_NODE (node->token.type)); + dfa->nexts[idx] = node->next->node_idx; + break; + } + + return err; +} + +/* Duplicate the epsilon closure of the node ROOT_NODE. + Note that duplicated nodes have constraint INIT_CONSTRAINT in addition + to their own constraint. */ + +static reg_errcode_t +internal_function +duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node, + int root_node, unsigned int init_constraint) +{ + int org_node, clone_node, ret; + unsigned int constraint = init_constraint; + for (org_node = top_org_node, clone_node = top_clone_node;;) + { + int org_dest, clone_dest; + if (dfa->nodes[org_node].type == OP_BACK_REF) + { + /* If the back reference epsilon-transit, its destination must + also have the constraint. Then duplicate the epsilon closure + of the destination of the back reference, and store it in + edests of the back reference. */ + org_dest = dfa->nexts[org_node]; + re_node_set_empty (dfa->edests + clone_node); + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + dfa->nexts[clone_node] = dfa->nexts[org_node]; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else if (dfa->edests[org_node].nelem == 0) + { + /* In case of the node can't epsilon-transit, don't duplicate the + destination and store the original destination as the + destination of the node. */ + dfa->nexts[clone_node] = dfa->nexts[org_node]; + break; + } + else if (dfa->edests[org_node].nelem == 1) + { + /* In case of the node can epsilon-transit, and it has only one + destination. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + /* If the node is root_node itself, it means the epsilon clsoure + has a loop. Then tie it to the destination of the root_node. */ + if (org_node == root_node && clone_node != org_node) + { + ret = re_node_set_insert (dfa->edests + clone_node, org_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + break; + } + /* In case of the node has another constraint, add it. */ + constraint |= dfa->nodes[org_node].constraint; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else /* dfa->edests[org_node].nelem == 2 */ + { + /* In case of the node can epsilon-transit, and it has two + destinations. In the bin_tree_t and DFA, that's '|' and '*'. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + /* Search for a duplicated node which satisfies the constraint. */ + clone_dest = search_duplicated_node (dfa, org_dest, constraint); + if (clone_dest == -1) + { + /* There is no such duplicated node, create a new one. */ + reg_errcode_t err; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + err = duplicate_node_closure (dfa, org_dest, clone_dest, + root_node, constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + { + /* There is a duplicated node which satisfies the constraint, + use it to avoid infinite loop. */ + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + + org_dest = dfa->edests[org_node].elems[1]; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + org_node = org_dest; + clone_node = clone_dest; + } + return REG_NOERROR; +} + +/* Search for a node which is duplicated from the node ORG_NODE, and + satisfies the constraint CONSTRAINT. */ + +static int +search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint) +{ + int idx; + for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) + { + if (org_node == dfa->org_indices[idx] + && constraint == dfa->nodes[idx].constraint) + return idx; /* Found. */ + } + return -1; /* Not found. */ +} + +/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. + Return the index of the new node, or -1 if insufficient storage is + available. */ + +static int +duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint) +{ + int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); + if (BE (dup_idx != -1, 1)) + { + dfa->nodes[dup_idx].constraint = constraint; + dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint; + dfa->nodes[dup_idx].duplicated = 1; + + /* Store the index of the original node. */ + dfa->org_indices[dup_idx] = org_idx; + } + return dup_idx; +} + +static reg_errcode_t +calc_inveclosure (re_dfa_t *dfa) +{ + int src, idx, ret; + for (idx = 0; idx < dfa->nodes_len; ++idx) + re_node_set_init_empty (dfa->inveclosures + idx); + + for (src = 0; src < dfa->nodes_len; ++src) + { + int *elems = dfa->eclosures[src].elems; + for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) + { + ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); + if (BE (ret == -1, 0)) + return REG_ESPACE; + } + } + + return REG_NOERROR; +} + +/* Calculate "eclosure" for all the node in DFA. */ + +static reg_errcode_t +calc_eclosure (re_dfa_t *dfa) +{ + int node_idx, incomplete; +#ifdef DEBUG + assert (dfa->nodes_len > 0); +#endif + incomplete = 0; + /* For each nodes, calculate epsilon closure. */ + for (node_idx = 0; ; ++node_idx) + { + reg_errcode_t err; + re_node_set eclosure_elem; + if (node_idx == dfa->nodes_len) + { + if (!incomplete) + break; + incomplete = 0; + node_idx = 0; + } + +#ifdef DEBUG + assert (dfa->eclosures[node_idx].nelem != -1); +#endif + + /* If we have already calculated, skip it. */ + if (dfa->eclosures[node_idx].nelem != 0) + continue; + /* Calculate epsilon closure of `node_idx'. */ + err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (dfa->eclosures[node_idx].nelem == 0) + { + incomplete = 1; + re_node_set_free (&eclosure_elem); + } + } + return REG_NOERROR; +} + +/* Calculate epsilon closure of NODE. */ + +static reg_errcode_t +calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root) +{ + reg_errcode_t err; + int i, incomplete; + re_node_set eclosure; + incomplete = 0; + err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* This indicates that we are calculating this node now. + We reference this value to avoid infinite loop. */ + dfa->eclosures[node].nelem = -1; + + /* If the current node has constraints, duplicate all nodes + since they must inherit the constraints. */ + if (dfa->nodes[node].constraint + && dfa->edests[node].nelem + && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) + { + err = duplicate_node_closure (dfa, node, node, node, + dfa->nodes[node].constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + /* Expand each epsilon destination nodes. */ + if (IS_EPSILON_NODE(dfa->nodes[node].type)) + for (i = 0; i < dfa->edests[node].nelem; ++i) + { + re_node_set eclosure_elem; + int edest = dfa->edests[node].elems[i]; + /* If calculating the epsilon closure of `edest' is in progress, + return intermediate result. */ + if (dfa->eclosures[edest].nelem == -1) + { + incomplete = 1; + continue; + } + /* If we haven't calculated the epsilon closure of `edest' yet, + calculate now. Otherwise use calculated epsilon closure. */ + if (dfa->eclosures[edest].nelem == 0) + { + err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + eclosure_elem = dfa->eclosures[edest]; + /* Merge the epsilon closure of `edest'. */ + re_node_set_merge (&eclosure, &eclosure_elem); + /* If the epsilon closure of `edest' is incomplete, + the epsilon closure of this node is also incomplete. */ + if (dfa->eclosures[edest].nelem == 0) + { + incomplete = 1; + re_node_set_free (&eclosure_elem); + } + } + + /* Epsilon closures include itself. */ + re_node_set_insert (&eclosure, node); + if (incomplete && !root) + dfa->eclosures[node].nelem = 0; + else + dfa->eclosures[node] = eclosure; + *new_set = eclosure; + return REG_NOERROR; +} + +/* Functions for token which are used in the parser. */ + +/* Fetch a token from INPUT. + We must not use this function inside bracket expressions. */ + +static void +internal_function +fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) +{ + re_string_skip_bytes (input, peek_token (result, input, syntax)); +} + +/* Peek a token from INPUT, and return the length of the token. + We must not use this function inside bracket expressions. */ + +static int +internal_function +peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) +{ + unsigned char c; + + if (re_string_eoi (input)) + { + token->type = END_OF_RE; + return 0; + } + + c = re_string_peek_byte (input, 0); + token->opr.c = c; + + token->word_char = 0; +#ifdef RE_ENABLE_I18N + token->mb_partial = 0; + if (input->mb_cur_max > 1 && + !re_string_first_byte (input, re_string_cur_idx (input))) + { + token->type = CHARACTER; + token->mb_partial = 1; + return 1; + } +#endif + if (c == '\\') + { + unsigned char c2; + if (re_string_cur_idx (input) + 1 >= re_string_length (input)) + { + token->type = BACK_SLASH; + return 1; + } + + c2 = re_string_peek_byte_case (input, 1); + token->opr.c = c2; + token->type = CHARACTER; +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc = re_string_wchar_at (input, + re_string_cur_idx (input) + 1); + token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; + } + else +#endif + token->word_char = IS_WORD_CHAR (c2) != 0; + + switch (c2) + { + case '|': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) + token->type = OP_ALT; + break; + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + if (!(syntax & RE_NO_BK_REFS)) + { + token->type = OP_BACK_REF; + token->opr.idx = c2 - '1'; + } + break; + case '<': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_FIRST; + } + break; + case '>': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_LAST; + } + break; + case 'b': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_DELIM; + } + break; + case 'B': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = NOT_WORD_DELIM; + } + break; + case 'w': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_WORD; + break; + case 'W': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTWORD; + break; + case 's': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_SPACE; + break; + case 'S': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTSPACE; + break; + case '`': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = BUF_FIRST; + } + break; + case '\'': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = BUF_LAST; + } + break; + case '(': + if (!(syntax & RE_NO_BK_PARENS)) + token->type = OP_OPEN_SUBEXP; + break; + case ')': + if (!(syntax & RE_NO_BK_PARENS)) + token->type = OP_CLOSE_SUBEXP; + break; + case '+': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_PLUS; + break; + case '?': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_QUESTION; + break; + case '{': + if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) + token->type = OP_OPEN_DUP_NUM; + break; + case '}': + if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) + token->type = OP_CLOSE_DUP_NUM; + break; + default: + break; + } + return 2; + } + + token->type = CHARACTER; +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); + token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; + } + else +#endif + token->word_char = IS_WORD_CHAR (token->opr.c); + + switch (c) + { + case '\n': + if (syntax & RE_NEWLINE_ALT) + token->type = OP_ALT; + break; + case '|': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) + token->type = OP_ALT; + break; + case '*': + token->type = OP_DUP_ASTERISK; + break; + case '+': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_PLUS; + break; + case '?': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_QUESTION; + break; + case '{': + if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + token->type = OP_OPEN_DUP_NUM; + break; + case '}': + if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + token->type = OP_CLOSE_DUP_NUM; + break; + case '(': + if (syntax & RE_NO_BK_PARENS) + token->type = OP_OPEN_SUBEXP; + break; + case ')': + if (syntax & RE_NO_BK_PARENS) + token->type = OP_CLOSE_SUBEXP; + break; + case '[': + token->type = OP_OPEN_BRACKET; + break; + case '.': + token->type = OP_PERIOD; + break; + case '^': + if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && + re_string_cur_idx (input) != 0) + { + char prev = re_string_peek_byte (input, -1); + if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') + break; + } + token->type = ANCHOR; + token->opr.ctx_type = LINE_FIRST; + break; + case '$': + if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && + re_string_cur_idx (input) + 1 != re_string_length (input)) + { + re_token_t next; + re_string_skip_bytes (input, 1); + peek_token (&next, input, syntax); + re_string_skip_bytes (input, -1); + if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) + break; + } + token->type = ANCHOR; + token->opr.ctx_type = LINE_LAST; + break; + default: + break; + } + return 1; +} + +/* Peek a token from INPUT, and return the length of the token. + We must not use this function out of bracket expressions. */ + +static int +internal_function +peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) +{ + unsigned char c; + if (re_string_eoi (input)) + { + token->type = END_OF_RE; + return 0; + } + c = re_string_peek_byte (input, 0); + token->opr.c = c; + +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1 && + !re_string_first_byte (input, re_string_cur_idx (input))) + { + token->type = CHARACTER; + return 1; + } +#endif /* RE_ENABLE_I18N */ + + if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) + && re_string_cur_idx (input) + 1 < re_string_length (input)) + { + /* In this case, '\' escape a character. */ + unsigned char c2; + re_string_skip_bytes (input, 1); + c2 = re_string_peek_byte (input, 0); + token->opr.c = c2; + token->type = CHARACTER; + return 1; + } + if (c == '[') /* '[' is a special char in a bracket exps. */ + { + unsigned char c2; + int token_len; + if (re_string_cur_idx (input) + 1 < re_string_length (input)) + c2 = re_string_peek_byte (input, 1); + else + c2 = 0; + token->opr.c = c2; + token_len = 2; + switch (c2) + { + case '.': + token->type = OP_OPEN_COLL_ELEM; + break; + case '=': + token->type = OP_OPEN_EQUIV_CLASS; + break; + case ':': + if (syntax & RE_CHAR_CLASSES) + { + token->type = OP_OPEN_CHAR_CLASS; + break; + } + /* else fall through. */ + default: + token->type = CHARACTER; + token->opr.c = c; + token_len = 1; + break; + } + return token_len; + } + switch (c) + { + case '-': + token->type = OP_CHARSET_RANGE; + break; + case ']': + token->type = OP_CLOSE_BRACKET; + break; + case '^': + token->type = OP_NON_MATCH_LIST; + break; + default: + token->type = CHARACTER; + } + return 1; +} + +/* Functions for parser. */ + +/* Entry point of the parser. + Parse the regular expression REGEXP and return the structure tree. + If an error is occured, ERR is set by error code, and return NULL. + This function build the following tree, from regular expression : + CAT + / \ + / \ + EOR + + CAT means concatenation. + EOR means end of regular expression. */ + +static bin_tree_t * +parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, + reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree, *eor, *root; + re_token_t current_token; + dfa->syntax = syntax; + fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE); + tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + eor = create_tree (dfa, NULL, NULL, END_OF_RE); + if (tree != NULL) + root = create_tree (dfa, tree, eor, CONCAT); + else + root = eor; + if (BE (eor == NULL || root == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + return root; +} + +/* This function build the following tree, from regular expression + |: + ALT + / \ + / \ + + + ALT means alternative, which represents the operator `|'. */ + +static bin_tree_t * +parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree, *branch = NULL; + tree = parse_branch (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + + while (token->type == OP_ALT) + { + fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); + if (token->type != OP_ALT && token->type != END_OF_RE + && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) + { + branch = parse_branch (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && branch == NULL, 0)) + return NULL; + } + else + branch = NULL; + tree = create_tree (dfa, tree, branch, OP_ALT); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + return tree; +} + +/* This function build the following tree, from regular expression + : + CAT + / \ + / \ + + + CAT means concatenation. */ + +static bin_tree_t * +parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + bin_tree_t *tree, *exp; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + tree = parse_expression (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + + while (token->type != OP_ALT && token->type != END_OF_RE + && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) + { + exp = parse_expression (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && exp == NULL, 0)) + { + return NULL; + } + if (tree != NULL && exp != NULL) + { + tree = create_tree (dfa, tree, exp, CONCAT); + if (tree == NULL) + { + *err = REG_ESPACE; + return NULL; + } + } + else if (tree == NULL) + tree = exp; + /* Otherwise exp == NULL, we don't need to create new tree. */ + } + return tree; +} + +/* This function build the following tree, from regular expression a*: + * + | + a +*/ + +static bin_tree_t * +parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree; + switch (token->type) + { + case CHARACTER: + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + while (!re_string_eoi (regexp) + && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) + { + bin_tree_t *mbc_remain; + fetch_token (token, regexp, syntax); + mbc_remain = create_token_tree (dfa, NULL, NULL, token); + tree = create_tree (dfa, tree, mbc_remain, CONCAT); + if (BE (mbc_remain == NULL || tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + } +#endif + break; + case OP_OPEN_SUBEXP: + tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_OPEN_BRACKET: + tree = parse_bracket_exp (regexp, dfa, token, syntax, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_BACK_REF: + if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)) + { + *err = REG_ESUBREG; + return NULL; + } + dfa->used_bkref_map |= 1 << token->opr.idx; + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + ++dfa->nbackref; + dfa->has_mb_node = 1; + break; + case OP_OPEN_DUP_NUM: + if (syntax & RE_CONTEXT_INVALID_DUP) + { + *err = REG_BADRPT; + return NULL; + } + /* FALLTHROUGH */ + case OP_DUP_ASTERISK: + case OP_DUP_PLUS: + case OP_DUP_QUESTION: + if (syntax & RE_CONTEXT_INVALID_OPS) + { + *err = REG_BADRPT; + return NULL; + } + else if (syntax & RE_CONTEXT_INDEP_OPS) + { + fetch_token (token, regexp, syntax); + return parse_expression (regexp, preg, token, syntax, nest, err); + } + /* else fall through */ + case OP_CLOSE_SUBEXP: + if ((token->type == OP_CLOSE_SUBEXP) && + !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) + { + *err = REG_ERPAREN; + return NULL; + } + /* else fall through */ + case OP_CLOSE_DUP_NUM: + /* We treat it as a normal character. */ + + /* Then we can these characters as normal characters. */ + token->type = CHARACTER; + /* mb_partial and word_char bits should be initialized already + by peek_token. */ + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + break; + case ANCHOR: + if ((token->opr.ctx_type + & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST)) + && dfa->word_ops_used == 0) + init_word_char (dfa); + if (token->opr.ctx_type == WORD_DELIM + || token->opr.ctx_type == NOT_WORD_DELIM) + { + bin_tree_t *tree_first, *tree_last; + if (token->opr.ctx_type == WORD_DELIM) + { + token->opr.ctx_type = WORD_FIRST; + tree_first = create_token_tree (dfa, NULL, NULL, token); + token->opr.ctx_type = WORD_LAST; + } + else + { + token->opr.ctx_type = INSIDE_WORD; + tree_first = create_token_tree (dfa, NULL, NULL, token); + token->opr.ctx_type = INSIDE_NOTWORD; + } + tree_last = create_token_tree (dfa, NULL, NULL, token); + tree = create_tree (dfa, tree_first, tree_last, OP_ALT); + if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + else + { + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + /* We must return here, since ANCHORs can't be followed + by repetition operators. + eg. RE"^*" is invalid or "", + it must not be "". */ + fetch_token (token, regexp, syntax); + return tree; + case OP_PERIOD: + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + if (dfa->mb_cur_max > 1) + dfa->has_mb_node = 1; + break; + case OP_WORD: + case OP_NOTWORD: + tree = build_charclass_op (dfa, regexp->trans, + (const unsigned char *) "alnum", + (const unsigned char *) "_", + token->type == OP_NOTWORD, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_SPACE: + case OP_NOTSPACE: + tree = build_charclass_op (dfa, regexp->trans, + (const unsigned char *) "space", + (const unsigned char *) "", + token->type == OP_NOTSPACE, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_ALT: + case END_OF_RE: + return NULL; + case BACK_SLASH: + *err = REG_EESCAPE; + return NULL; + default: + /* Must not happen? */ +#ifdef DEBUG + assert (0); +#endif + return NULL; + } + fetch_token (token, regexp, syntax); + + while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS + || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) + { + tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + /* In BRE consecutive duplications are not allowed. */ + if ((syntax & RE_CONTEXT_INVALID_DUP) + && (token->type == OP_DUP_ASTERISK + || token->type == OP_OPEN_DUP_NUM)) + { + *err = REG_BADRPT; + return NULL; + } + } + + return tree; +} + +/* This function build the following tree, from regular expression + (): + SUBEXP + | + +*/ + +static bin_tree_t * +parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree; + size_t cur_nsub; + cur_nsub = preg->re_nsub++; + + fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); + + /* The subexpression may be a null string. */ + if (token->type == OP_CLOSE_SUBEXP) + tree = NULL; + else + { + tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); + if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) + *err = REG_EPAREN; + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } + + if (cur_nsub <= '9' - '1') + dfa->completed_bkref_map |= 1 << cur_nsub; + + tree = create_tree (dfa, tree, NULL, SUBEXP); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + tree->token.opr.idx = cur_nsub; + return tree; +} + +/* This function parse repetition operators like "*", "+", "{1,3}" etc. */ + +static bin_tree_t * +parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err) +{ + bin_tree_t *tree = NULL, *old_tree = NULL; + int i, start, end, start_idx = re_string_cur_idx (regexp); + re_token_t start_token = *token; + + if (token->type == OP_OPEN_DUP_NUM) + { + end = 0; + start = fetch_number (regexp, token, syntax); + if (start == -1) + { + if (token->type == CHARACTER && token->opr.c == ',') + start = 0; /* We treat "{,m}" as "{0,m}". */ + else + { + *err = REG_BADBR; /* {} is invalid. */ + return NULL; + } + } + if (BE (start != -2, 1)) + { + /* We treat "{n}" as "{n,n}". */ + end = ((token->type == OP_CLOSE_DUP_NUM) ? start + : ((token->type == CHARACTER && token->opr.c == ',') + ? fetch_number (regexp, token, syntax) : -2)); + } + if (BE (start == -2 || end == -2, 0)) + { + /* Invalid sequence. */ + if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)) + { + if (token->type == END_OF_RE) + *err = REG_EBRACE; + else + *err = REG_BADBR; + + return NULL; + } + + /* If the syntax bit is set, rollback. */ + re_string_set_index (regexp, start_idx); + *token = start_token; + token->type = CHARACTER; + /* mb_partial and word_char bits should be already initialized by + peek_token. */ + return elem; + } + + if (BE (end != -1 && start > end, 0)) + { + /* First number greater than second. */ + *err = REG_BADBR; + return NULL; + } + } + else + { + start = (token->type == OP_DUP_PLUS) ? 1 : 0; + end = (token->type == OP_DUP_QUESTION) ? 1 : -1; + } + + fetch_token (token, regexp, syntax); + + if (BE (elem == NULL, 0)) + return NULL; + if (BE (start == 0 && end == 0, 0)) + { + postorder (elem, free_tree, NULL); + return NULL; + } + + /* Extract "{n,m}" to "...{0,}". */ + if (BE (start > 0, 0)) + { + tree = elem; + for (i = 2; i <= start; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT); + if (BE (elem == NULL || tree == NULL, 0)) + goto parse_dup_op_espace; + } + + if (start == end) + return tree; + + /* Duplicate ELEM before it is marked optional. */ + elem = duplicate_tree (elem, dfa); + old_tree = tree; + } + else + old_tree = NULL; + + if (elem->token.type == SUBEXP) + postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx); + + tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT)); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + + /* This loop is actually executed only when end != -1, + to rewrite {0,n} as ((...?)?)?... We have + already created the start+1-th copy. */ + for (i = start + 2; i <= end; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT); + if (BE (elem == NULL || tree == NULL, 0)) + goto parse_dup_op_espace; + + tree = create_tree (dfa, tree, NULL, OP_ALT); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + } + + if (old_tree) + tree = create_tree (dfa, old_tree, tree, CONCAT); + + return tree; + + parse_dup_op_espace: + *err = REG_ESPACE; + return NULL; +} + +/* Size of the names for collating symbol/equivalence_class/character_class. + I'm not sure, but maybe enough. */ +#define BRACKET_NAME_BUF_SIZE 32 + +#ifndef _LIBC + /* Local function for parse_bracket_exp only used in case of NOT _LIBC. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + +static reg_errcode_t +internal_function +# ifdef RE_ENABLE_I18N +build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, + bracket_elem_t *start_elem, bracket_elem_t *end_elem) +# else /* not RE_ENABLE_I18N */ +build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, + bracket_elem_t *end_elem) +# endif /* not RE_ENABLE_I18N */ +{ + unsigned int start_ch, end_ch; + /* Equivalence Classes and Character Classes can't be a range start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + /* We can handle no multi character collating elements without libc + support. */ + if (BE ((start_elem->type == COLL_SYM + && strlen ((char *) start_elem->opr.name) > 1) + || (end_elem->type == COLL_SYM + && strlen ((char *) end_elem->opr.name) > 1), 0)) + return REG_ECOLLATE; + +# ifdef RE_ENABLE_I18N + { + wchar_t wc; + wint_t start_wc; + wint_t end_wc; + wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + + start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); + start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) + ? __btowc (start_ch) : start_elem->opr.wch); + end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) + ? __btowc (end_ch) : end_elem->opr.wch); + if (start_wc == WEOF || end_wc == WEOF) + return REG_ECOLLATE; + cmp_buf[0] = start_wc; + cmp_buf[4] = end_wc; + if (wcscoll (cmp_buf, cmp_buf + 4) > 0) + return REG_ERANGE; + + /* Got valid collation sequence values, add them as a new entry. + However, for !_LIBC we have no collation elements: if the + character set is single byte, the single byte character set + that we build below suffices. parse_bracket_exp passes + no MBCSET if dfa->mb_cur_max == 1. */ + if (mbcset) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + wchar_t *new_array_start, *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + /* Use realloc since mbcset->range_starts and mbcset->range_ends + are NULL if *range_alloc == 0. */ + new_array_start = re_realloc (mbcset->range_starts, wchar_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, wchar_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_wc; + mbcset->range_ends[mbcset->nranges++] = end_wc; + } + + /* Build the table for single byte characters. */ + for (wc = 0; wc < SBC_MAX; ++wc) + { + cmp_buf[2] = wc; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + bitset_set (sbcset, wc); + } + } +# else /* not RE_ENABLE_I18N */ + { + unsigned int ch; + start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); + if (start_ch > end_ch) + return REG_ERANGE; + /* Build the table for single byte characters. */ + for (ch = 0; ch < SBC_MAX; ++ch) + if (start_ch <= ch && ch <= end_ch) + bitset_set (sbcset, ch); + } +# endif /* not RE_ENABLE_I18N */ + return REG_NOERROR; +} +#endif /* not _LIBC */ + +#ifndef _LIBC +/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument since we may update it. */ + +static reg_errcode_t +internal_function +# ifdef RE_ENABLE_I18N +build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, + int *coll_sym_alloc, const unsigned char *name) +# else /* not RE_ENABLE_I18N */ +build_collating_symbol (bitset_t sbcset, const unsigned char *name) +# endif /* not RE_ENABLE_I18N */ +{ + size_t name_len = strlen ((const char *) name); + if (BE (name_len != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } +} +#endif /* not _LIBC */ + +/* This function parse bracket expression like "[abc]", "[a-c]", + "[[.a-a.]]" etc. */ + +static bin_tree_t * +parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err) +{ +#ifdef _LIBC + const unsigned char *collseqmb; + const char *collseqwc; + uint32_t nrules; + int32_t table_size; + const int32_t *symb_table; + const unsigned char *extra; + + /* Local function for parse_bracket_exp used in _LIBC environement. + Seek the collating symbol entry correspondings to NAME. + Return the index of the symbol in the SYMB_TABLE. */ + + auto inline int32_t + __attribute ((always_inline)) + seek_collating_symbol_entry (name, name_len) + const unsigned char *name; + size_t name_len; + { + int32_t hash = elem_hash ((const char *) name, name_len); + int32_t elem = hash % table_size; + if (symb_table[2 * elem] != 0) + { + int32_t second = hash % (table_size - 2) + 1; + + do + { + /* First compare the hashing value. */ + if (symb_table[2 * elem] == hash + /* Compare the length of the name. */ + && name_len == extra[symb_table[2 * elem + 1]] + /* Compare the name. */ + && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], + name_len) == 0) + { + /* Yep, this is the entry. */ + break; + } + + /* Next entry. */ + elem += second; + } + while (symb_table[2 * elem] != 0); + } + return elem; + } + + /* Local function for parse_bracket_exp used in _LIBC environment. + Look up the collation sequence value of BR_ELEM. + Return the value if succeeded, UINT_MAX otherwise. */ + + auto inline unsigned int + __attribute ((always_inline)) + lookup_collation_sequence_value (br_elem) + bracket_elem_t *br_elem; + { + if (br_elem->type == SB_CHAR) + { + /* + if (MB_CUR_MAX == 1) + */ + if (nrules == 0) + return collseqmb[br_elem->opr.ch]; + else + { + wint_t wc = __btowc (br_elem->opr.ch); + return __collseq_table_lookup (collseqwc, wc); + } + } + else if (br_elem->type == MB_CHAR) + { + if (nrules != 0) + return __collseq_table_lookup (collseqwc, br_elem->opr.wch); + } + else if (br_elem->type == COLL_SYM) + { + size_t sym_name_len = strlen ((char *) br_elem->opr.name); + if (nrules != 0) + { + int32_t elem, idx; + elem = seek_collating_symbol_entry (br_elem->opr.name, + sym_name_len); + if (symb_table[2 * elem] != 0) + { + /* We found the entry. */ + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + /* Skip the byte sequence of the collating element. */ + idx += 1 + extra[idx]; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + /* Skip the multibyte collation sequence value. */ + idx += sizeof (unsigned int); + /* Skip the wide char sequence of the collating element. */ + idx += sizeof (unsigned int) * + (1 + *(unsigned int *) (extra + idx)); + /* Return the collation sequence value. */ + return *(unsigned int *) (extra + idx); + } + else if (symb_table[2 * elem] == 0 && sym_name_len == 1) + { + /* No valid character. Match it as a single byte + character. */ + return collseqmb[br_elem->opr.name[0]]; + } + } + else if (sym_name_len == 1) + return collseqmb[br_elem->opr.name[0]]; + } + return UINT_MAX; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + + auto inline reg_errcode_t + __attribute ((always_inline)) + build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) + re_charset_t *mbcset; + int *range_alloc; + bitset_t sbcset; + bracket_elem_t *start_elem, *end_elem; + { + unsigned int ch; + uint32_t start_collseq; + uint32_t end_collseq; + + /* Equivalence Classes and Character Classes can't be a range + start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + start_collseq = lookup_collation_sequence_value (start_elem); + end_collseq = lookup_collation_sequence_value (end_elem); + /* Check start/end collation sequence values. */ + if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) + return REG_ECOLLATE; + if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) + return REG_ERANGE; + + /* Got valid collation sequence values, add them as a new entry. + However, if we have no collation elements, and the character set + is single byte, the single byte character set that we + build below suffices. */ + if (nrules > 0 || dfa->mb_cur_max > 1) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + uint32_t *new_array_start; + uint32_t *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + new_array_start = re_realloc (mbcset->range_starts, uint32_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, uint32_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_collseq; + mbcset->range_ends[mbcset->nranges++] = end_collseq; + } + + /* Build the table for single byte characters. */ + for (ch = 0; ch < SBC_MAX; ch++) + { + uint32_t ch_collseq; + /* + if (MB_CUR_MAX == 1) + */ + if (nrules == 0) + ch_collseq = collseqmb[ch]; + else + ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); + if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) + bitset_set (sbcset, ch); + } + return REG_NOERROR; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument sinse we may update it. */ + + auto inline reg_errcode_t + __attribute ((always_inline)) + build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) + re_charset_t *mbcset; + int *coll_sym_alloc; + bitset_t sbcset; + const unsigned char *name; + { + int32_t elem, idx; + size_t name_len = strlen ((const char *) name); + if (nrules != 0) + { + elem = seek_collating_symbol_entry (name, name_len); + if (symb_table[2 * elem] != 0) + { + /* We found the entry. */ + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + } + else if (symb_table[2 * elem] == 0 && name_len == 1) + { + /* No valid character, treat it as a normal + character. */ + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } + else + return REG_ECOLLATE; + + /* Got valid collation sequence, add it as a new entry. */ + /* Check the space of the arrays. */ + if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->ncoll_syms is 0. */ + int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; + /* Use realloc since mbcset->coll_syms is NULL + if *alloc == 0. */ + int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, + new_coll_sym_alloc); + if (BE (new_coll_syms == NULL, 0)) + return REG_ESPACE; + mbcset->coll_syms = new_coll_syms; + *coll_sym_alloc = new_coll_sym_alloc; + } + mbcset->coll_syms[mbcset->ncoll_syms++] = idx; + return REG_NOERROR; + } + else + { + if (BE (name_len != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } + } + } +#endif + + re_token_t br_token; + re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; + int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; + int equiv_class_alloc = 0, char_class_alloc = 0; +#endif /* not RE_ENABLE_I18N */ + int non_match = 0; + bin_tree_t *work_tree; + int token_len; + int first_round = 1; +#ifdef _LIBC + collseqmb = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); + nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules) + { + /* + if (MB_CUR_MAX > 1) + */ + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); + symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_SYMB_TABLEMB); + extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_SYMB_EXTRAMB); + } +#endif + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); +#ifdef RE_ENABLE_I18N + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ +#ifdef RE_ENABLE_I18N + if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else + if (BE (sbcset == NULL, 0)) +#endif /* RE_ENABLE_I18N */ + { + *err = REG_ESPACE; + return NULL; + } + + token_len = peek_token_bracket (token, regexp, syntax); + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_BADPAT; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_NON_MATCH_LIST) + { +#ifdef RE_ENABLE_I18N + mbcset->non_match = 1; +#endif /* not RE_ENABLE_I18N */ + non_match = 1; + if (syntax & RE_HAT_LISTS_NOT_NEWLINE) + bitset_set (sbcset, '\n'); + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + token_len = peek_token_bracket (token, regexp, syntax); + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_BADPAT; + goto parse_bracket_exp_free_return; + } + } + + /* We treat the first ']' as a normal character. */ + if (token->type == OP_CLOSE_BRACKET) + token->type = CHARACTER; + + while (1) + { + bracket_elem_t start_elem, end_elem; + unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE]; + unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE]; + reg_errcode_t ret; + int token_len2 = 0, is_range_exp = 0; + re_token_t token2; + + start_elem.opr.name = start_name_buf; + ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, + syntax, first_round); + if (BE (ret != REG_NOERROR, 0)) + { + *err = ret; + goto parse_bracket_exp_free_return; + } + first_round = 0; + + /* Get information about the next token. We need it in any case. */ + token_len = peek_token_bracket (token, regexp, syntax); + + /* Do not check for ranges if we know they are not allowed. */ + if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS) + { + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_CHARSET_RANGE) + { + re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ + token_len2 = peek_token_bracket (&token2, regexp, syntax); + if (BE (token2.type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token2.type == OP_CLOSE_BRACKET) + { + /* We treat the last '-' as a normal character. */ + re_string_skip_bytes (regexp, -token_len); + token->type = CHARACTER; + } + else + is_range_exp = 1; + } + } + + if (is_range_exp == 1) + { + end_elem.opr.name = end_name_buf; + ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, + dfa, syntax, 1); + if (BE (ret != REG_NOERROR, 0)) + { + *err = ret; + goto parse_bracket_exp_free_return; + } + + token_len = peek_token_bracket (token, regexp, syntax); + +#ifdef _LIBC + *err = build_range_exp (sbcset, mbcset, &range_alloc, + &start_elem, &end_elem); +#else +# ifdef RE_ENABLE_I18N + *err = build_range_exp (sbcset, + dfa->mb_cur_max > 1 ? mbcset : NULL, + &range_alloc, &start_elem, &end_elem); +# else + *err = build_range_exp (sbcset, &start_elem, &end_elem); +# endif +#endif /* RE_ENABLE_I18N */ + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + } + else + { + switch (start_elem.type) + { + case SB_CHAR: + bitset_set (sbcset, start_elem.opr.ch); + break; +#ifdef RE_ENABLE_I18N + case MB_CHAR: + /* Check whether the array has enough space. */ + if (BE (mbchar_alloc == mbcset->nmbchars, 0)) + { + wchar_t *new_mbchars; + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nmbchars is 0. */ + mbchar_alloc = 2 * mbcset->nmbchars + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + new_mbchars = re_realloc (mbcset->mbchars, wchar_t, + mbchar_alloc); + if (BE (new_mbchars == NULL, 0)) + goto parse_bracket_exp_espace; + mbcset->mbchars = new_mbchars; + } + mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; + break; +#endif /* RE_ENABLE_I18N */ + case EQUIV_CLASS: + *err = build_equiv_class (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &equiv_class_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + case COLL_SYM: + *err = build_collating_symbol (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &coll_sym_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + case CHAR_CLASS: + *err = build_charclass (regexp->trans, sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &char_class_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name, syntax); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + default: + assert (0); + break; + } + } + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_CLOSE_BRACKET) + break; + } + + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + + /* If it is non-matching list. */ + if (non_match) + bitset_not (sbcset); + +#ifdef RE_ENABLE_I18N + /* Ensure only single byte characters are set. */ + if (dfa->mb_cur_max > 1) + bitset_mask (sbcset, dfa->sb_char); + + if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes + || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes + || mbcset->non_match))) + { + bin_tree_t *mbc_tree; + int sbc_idx; + /* Build a tree for complex bracket. */ + dfa->has_mb_node = 1; + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto parse_bracket_exp_espace; + for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) + if (sbcset[sbc_idx]) + break; + /* If there are no bits set in sbcset, there is no point + of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ + if (sbc_idx < BITSET_WORDS) + { + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + + /* Then join them by ALT node. */ + work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + else + { + re_free (sbcset); + work_tree = mbc_tree; + } + } + else +#endif /* not RE_ENABLE_I18N */ + { +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + return work_tree; + + parse_bracket_exp_espace: + *err = REG_ESPACE; + parse_bracket_exp_free_return: + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + return NULL; +} + +/* Parse an element in the bracket expression. */ + +static reg_errcode_t +parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token, int token_len, re_dfa_t *dfa, + reg_syntax_t syntax, int accept_hyphen) +{ +#ifdef RE_ENABLE_I18N + int cur_char_size; + cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); + if (cur_char_size > 1) + { + elem->type = MB_CHAR; + elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp)); + re_string_skip_bytes (regexp, cur_char_size); + return REG_NOERROR; + } +#endif /* RE_ENABLE_I18N */ + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS + || token->type == OP_OPEN_EQUIV_CLASS) + return parse_bracket_symbol (elem, regexp, token); + if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen) + { + /* A '-' must only appear as anything but a range indicator before + the closing bracket. Everything else is an error. */ + re_token_t token2; + (void) peek_token_bracket (&token2, regexp, syntax); + if (token2.type != OP_CLOSE_BRACKET) + /* The actual error value is not standardized since this whole + case is undefined. But ERANGE makes good sense. */ + return REG_ERANGE; + } + elem->type = SB_CHAR; + elem->opr.ch = token->opr.c; + return REG_NOERROR; +} + +/* Parse a bracket symbol in the bracket expression. Bracket symbols are + such as [::], [..], and + [==]. */ + +static reg_errcode_t +parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token) +{ + unsigned char ch, delim = token->opr.c; + int i = 0; + if (re_string_eoi(regexp)) + return REG_EBRACK; + for (;; ++i) + { + if (i >= BRACKET_NAME_BUF_SIZE) + return REG_EBRACK; + if (token->type == OP_OPEN_CHAR_CLASS) + ch = re_string_fetch_byte_case (regexp); + else + ch = re_string_fetch_byte (regexp); + if (re_string_eoi(regexp)) + return REG_EBRACK; + if (ch == delim && re_string_peek_byte (regexp, 0) == ']') + break; + elem->opr.name[i] = ch; + } + re_string_skip_bytes (regexp, 1); + elem->opr.name[i] = '\0'; + switch (token->type) + { + case OP_OPEN_COLL_ELEM: + elem->type = COLL_SYM; + break; + case OP_OPEN_EQUIV_CLASS: + elem->type = EQUIV_CLASS; + break; + case OP_OPEN_CHAR_CLASS: + elem->type = CHAR_CLASS; + break; + default: + break; + } + return REG_NOERROR; +} + + /* Helper function for parse_bracket_exp. + Build the equivalence class which is represented by NAME. + The result are written to MBCSET and SBCSET. + EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes, + is a pointer argument sinse we may update it. */ + +static reg_errcode_t +#ifdef RE_ENABLE_I18N +build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, + int *equiv_class_alloc, const unsigned char *name) +#else /* not RE_ENABLE_I18N */ +build_equiv_class (bitset_t sbcset, const unsigned char *name) +#endif /* not RE_ENABLE_I18N */ +{ +#ifdef _LIBC + uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules != 0) + { + const int32_t *table, *indirect; + const unsigned char *weights, *extra, *cp; + unsigned char char_buf[2]; + int32_t idx1, idx2; + unsigned int ch; + size_t len; + /* This #include defines a local function! */ +# include + /* Calculate the index for equivalence class. */ + cp = name; + table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_WEIGHTMB); + extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_INDIRECTMB); + idx1 = findidx (&cp); + if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)) + /* This isn't a valid character. */ + return REG_ECOLLATE; + + /* Build single byte matcing table for this equivalence class. */ + char_buf[1] = (unsigned char) '\0'; + len = weights[idx1 & 0xffffff]; + for (ch = 0; ch < SBC_MAX; ++ch) + { + char_buf[0] = ch; + cp = char_buf; + idx2 = findidx (&cp); +/* + idx2 = table[ch]; +*/ + if (idx2 == 0) + /* This isn't a valid character. */ + continue; + /* Compare only if the length matches and the collation rule + index is the same. */ + if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24)) + { + int cnt = 0; + + while (cnt <= len && + weights[(idx1 & 0xffffff) + 1 + cnt] + == weights[(idx2 & 0xffffff) + 1 + cnt]) + ++cnt; + + if (cnt > len) + bitset_set (sbcset, ch); + } + } + /* Check whether the array has enough space. */ + if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nequiv_classes is 0. */ + int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; + /* Use realloc since the array is NULL if *alloc == 0. */ + int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, + int32_t, + new_equiv_class_alloc); + if (BE (new_equiv_classes == NULL, 0)) + return REG_ESPACE; + mbcset->equiv_classes = new_equiv_classes; + *equiv_class_alloc = new_equiv_class_alloc; + } + mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; + } + else +#endif /* _LIBC */ + { + if (BE (strlen ((const char *) name) != 1, 0)) + return REG_ECOLLATE; + bitset_set (sbcset, *name); + } + return REG_NOERROR; +} + + /* Helper function for parse_bracket_exp. + Build the character class which is represented by NAME. + The result are written to MBCSET and SBCSET. + CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes, + is a pointer argument sinse we may update it. */ + +static reg_errcode_t +#ifdef RE_ENABLE_I18N +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + re_charset_t *mbcset, int *char_class_alloc, + const unsigned char *class_name, reg_syntax_t syntax) +#else /* not RE_ENABLE_I18N */ +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + const unsigned char *class_name, reg_syntax_t syntax) +#endif /* not RE_ENABLE_I18N */ +{ + int i; + const char *name = (const char *) class_name; + + /* In case of REG_ICASE "upper" and "lower" match the both of + upper and lower cases. */ + if ((syntax & RE_ICASE) + && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) + name = "alpha"; + +#ifdef RE_ENABLE_I18N + /* Check the space of the arrays. */ + if (BE (*char_class_alloc == mbcset->nchar_classes, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nchar_classes is 0. */ + int new_char_class_alloc = 2 * mbcset->nchar_classes + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, + new_char_class_alloc); + if (BE (new_char_classes == NULL, 0)) + return REG_ESPACE; + mbcset->char_classes = new_char_classes; + *char_class_alloc = new_char_class_alloc; + } + mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); +#endif /* RE_ENABLE_I18N */ + +#define BUILD_CHARCLASS_LOOP(ctype_func) \ + do { \ + if (BE (trans != NULL, 0)) \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, trans[i]); \ + } \ + else \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, i); \ + } \ + } while (0) + + if (strcmp (name, "alnum") == 0) + BUILD_CHARCLASS_LOOP (isalnum); + else if (strcmp (name, "cntrl") == 0) + BUILD_CHARCLASS_LOOP (iscntrl); + else if (strcmp (name, "lower") == 0) + BUILD_CHARCLASS_LOOP (islower); + else if (strcmp (name, "space") == 0) + BUILD_CHARCLASS_LOOP (isspace); + else if (strcmp (name, "alpha") == 0) + BUILD_CHARCLASS_LOOP (isalpha); + else if (strcmp (name, "digit") == 0) + BUILD_CHARCLASS_LOOP (isdigit); + else if (strcmp (name, "print") == 0) + BUILD_CHARCLASS_LOOP (isprint); + else if (strcmp (name, "upper") == 0) + BUILD_CHARCLASS_LOOP (isupper); + else if (strcmp (name, "blank") == 0) + BUILD_CHARCLASS_LOOP (isblank); + else if (strcmp (name, "graph") == 0) + BUILD_CHARCLASS_LOOP (isgraph); + else if (strcmp (name, "punct") == 0) + BUILD_CHARCLASS_LOOP (ispunct); + else if (strcmp (name, "xdigit") == 0) + BUILD_CHARCLASS_LOOP (isxdigit); + else + return REG_ECTYPE; + + return REG_NOERROR; +} + +static bin_tree_t * +build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, + const unsigned char *class_name, + const unsigned char *extra, int non_match, + reg_errcode_t *err) +{ + re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; + int alloc = 0; +#endif /* not RE_ENABLE_I18N */ + reg_errcode_t ret; + re_token_t br_token; + bin_tree_t *tree; + + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); +#ifdef RE_ENABLE_I18N + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ + +#ifdef RE_ENABLE_I18N + if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else /* not RE_ENABLE_I18N */ + if (BE (sbcset == NULL, 0)) +#endif /* not RE_ENABLE_I18N */ + { + *err = REG_ESPACE; + return NULL; + } + + if (non_match) + { +#ifdef RE_ENABLE_I18N + mbcset->non_match = 1; +#endif /* not RE_ENABLE_I18N */ + } + + /* We don't care the syntax in this case. */ + ret = build_charclass (trans, sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &alloc, +#endif /* RE_ENABLE_I18N */ + class_name, 0); + + if (BE (ret != REG_NOERROR, 0)) + { + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + *err = ret; + return NULL; + } + /* \w match '_' also. */ + for (; *extra; extra++) + bitset_set (sbcset, *extra); + + /* If it is non-matching list. */ + if (non_match) + bitset_not (sbcset); + +#ifdef RE_ENABLE_I18N + /* Ensure only single byte characters are set. */ + if (dfa->mb_cur_max > 1) + bitset_mask (sbcset, dfa->sb_char); +#endif + + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (tree == NULL, 0)) + goto build_word_op_espace; + +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + bin_tree_t *mbc_tree; + /* Build a tree for complex bracket. */ + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + dfa->has_mb_node = 1; + mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto build_word_op_espace; + /* Then join them by ALT node. */ + tree = create_tree (dfa, tree, mbc_tree, OP_ALT); + if (BE (mbc_tree != NULL, 1)) + return tree; + } + else + { + free_charset (mbcset); + return tree; + } +#else /* not RE_ENABLE_I18N */ + return tree; +#endif /* not RE_ENABLE_I18N */ + + build_word_op_espace: + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + *err = REG_ESPACE; + return NULL; +} + +/* This is intended for the expressions like "a{1,3}". + Fetch a number from `input', and return the number. + Return -1, if the number field is empty like "{,1}". + Return -2, If an error is occured. */ + +static int +fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) +{ + int num = -1; + unsigned char c; + while (1) + { + fetch_token (token, input, syntax); + c = token->opr.c; + if (BE (token->type == END_OF_RE, 0)) + return -2; + if (token->type == OP_CLOSE_DUP_NUM || c == ',') + break; + num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2) + ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0')); + num = (num > RE_DUP_MAX) ? -2 : num; + } + return num; +} + +#ifdef RE_ENABLE_I18N +static void +free_charset (re_charset_t *cset) +{ + re_free (cset->mbchars); +# ifdef _LIBC + re_free (cset->coll_syms); + re_free (cset->equiv_classes); + re_free (cset->range_starts); + re_free (cset->range_ends); +# endif + re_free (cset->char_classes); + re_free (cset); +} +#endif /* RE_ENABLE_I18N */ + +/* Functions for binary tree operation. */ + +/* Create a tree node. */ + +static bin_tree_t * +create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + re_token_type_t type) +{ + re_token_t t; + t.type = type; + return create_token_tree (dfa, left, right, &t); +} + +static bin_tree_t * +create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + const re_token_t *token) +{ + bin_tree_t *tree; + if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)) + { + bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1); + + if (storage == NULL) + return NULL; + storage->next = dfa->str_tree_storage; + dfa->str_tree_storage = storage; + dfa->str_tree_storage_idx = 0; + } + tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++]; + + tree->parent = NULL; + tree->left = left; + tree->right = right; + tree->token = *token; + tree->token.duplicated = 0; + tree->token.opt_subexp = 0; + tree->first = NULL; + tree->next = NULL; + tree->node_idx = -1; + + if (left != NULL) + left->parent = tree; + if (right != NULL) + right->parent = tree; + return tree; +} + +/* Mark the tree SRC as an optional subexpression. + To be called from preorder or postorder. */ + +static reg_errcode_t +mark_opt_subexp (void *extra, bin_tree_t *node) +{ + int idx = (int) (long) extra; + if (node->token.type == SUBEXP && node->token.opr.idx == idx) + node->token.opt_subexp = 1; + + return REG_NOERROR; +} + +/* Free the allocated memory inside NODE. */ + +static void +free_token (re_token_t *node) +{ +#ifdef RE_ENABLE_I18N + if (node->type == COMPLEX_BRACKET && node->duplicated == 0) + free_charset (node->opr.mbcset); + else +#endif /* RE_ENABLE_I18N */ + if (node->type == SIMPLE_BRACKET && node->duplicated == 0) + re_free (node->opr.sbcset); +} + +/* Worker function for tree walking. Free the allocated memory inside NODE + and its children. */ + +static reg_errcode_t +free_tree (void *extra, bin_tree_t *node) +{ + free_token (&node->token); + return REG_NOERROR; +} + + +/* Duplicate the node SRC, and return new node. This is a preorder + visit similar to the one implemented by the generic visitor, but + we need more infrastructure to maintain two parallel trees --- so, + it's easier to duplicate. */ + +static bin_tree_t * +duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa) +{ + const bin_tree_t *node; + bin_tree_t *dup_root; + bin_tree_t **p_new = &dup_root, *dup_node = root->parent; + + for (node = root; ; ) + { + /* Create a new tree and link it back to the current parent. */ + *p_new = create_token_tree (dfa, NULL, NULL, &node->token); + if (*p_new == NULL) + return NULL; + (*p_new)->parent = dup_node; + (*p_new)->token.duplicated = 1; + dup_node = *p_new; + + /* Go to the left node, or up and to the right. */ + if (node->left) + { + node = node->left; + p_new = &dup_node->left; + } + else + { + const bin_tree_t *prev = NULL; + while (node->right == prev || node->right == NULL) + { + prev = node; + node = node->parent; + dup_node = dup_node->parent; + if (!node) + return dup_root; + } + node = node->right; + p_new = &dup_node->right; + } + } +} diff --git a/gnu_regex/.svn/text-base/regex.c.svn-base b/gnu_regex/.svn/text-base/regex.c.svn-base new file mode 100644 index 0000000..bec9f9d --- /dev/null +++ b/gnu_regex/.svn/text-base/regex.c.svn-base @@ -0,0 +1,74 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +/* Make sure noone compiles this code with a C++ compiler. */ +#ifdef __cplusplus +# error "This is C code, use a C compiler" +#endif + +#ifdef _LIBC +/* We have to keep the namespace clean. */ +# define regfree(preg) __regfree (preg) +# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) +# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) +# define regerror(errcode, preg, errbuf, errbuf_size) \ + __regerror(errcode, preg, errbuf, errbuf_size) +# define re_set_registers(bu, re, nu, st, en) \ + __re_set_registers (bu, re, nu, st, en) +# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ + __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) +# define re_match(bufp, string, size, pos, regs) \ + __re_match (bufp, string, size, pos, regs) +# define re_search(bufp, string, size, startpos, range, regs) \ + __re_search (bufp, string, size, startpos, range, regs) +# define re_compile_pattern(pattern, length, bufp) \ + __re_compile_pattern (pattern, length, bufp) +# define re_set_syntax(syntax) __re_set_syntax (syntax) +# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ + __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) +# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) + +# include "../locale/localeinfo.h" +#endif + +/* On some systems, limits.h sets RE_DUP_MAX to a lower value than + GNU regex allows. Include it before , which correctly + #undefs RE_DUP_MAX and sets it to the right value. */ +#include + +#include "regex.h" +#include "regex_internal.h" + +#include "regex_internal.c" +#include "regcomp.c" +#include "regexec.c" + +/* Binary backward compatibility. */ +#if _LIBC +# include +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3) +link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.") +int re_max_failures = 2000; +# endif +#endif diff --git a/gnu_regex/.svn/text-base/regex.h.svn-base b/gnu_regex/.svn/text-base/regex.h.svn-base new file mode 100644 index 0000000..2132772 --- /dev/null +++ b/gnu_regex/.svn/text-base/regex.h.svn-base @@ -0,0 +1,575 @@ +/* Definitions for data structures and routines for the regular + expression library. + Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006,2008 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _REGEX_H +#define _REGEX_H 1 + +#include + +/* Allow the use in C++ code. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* The following two types have to be signed and unsigned integer type + wide enough to hold a value of a pointer. For most ANSI compilers + ptrdiff_t and size_t should be likely OK. Still size of these two + types is 2 for Microsoft C. Ugh... */ +typedef long int s_reg_t; +typedef unsigned long int active_reg_t; + +/* The following bits are used to determine the regexp syntax we + recognize. The set/not-set meanings are chosen so that Emacs syntax + remains the value 0. The bits are given in alphabetical order, and + the definitions shifted by one from the previous bit; thus, when we + add or remove a bit, only one other definition need change. */ +typedef unsigned long int reg_syntax_t; + +#ifdef __USE_GNU +/* If this bit is not set, then \ inside a bracket expression is literal. + If set, then such a \ quotes the following character. */ +# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) + +/* If this bit is not set, then + and ? are operators, and \+ and \? are + literals. + If set, then \+ and \? are operators and + and ? are literals. */ +# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) + +/* If this bit is set, then character classes are supported. They are: + [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], + [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. + If not set, then character classes are not supported. */ +# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) + +/* If this bit is set, then ^ and $ are always anchors (outside bracket + expressions, of course). + If this bit is not set, then it depends: + ^ is an anchor if it is at the beginning of a regular + expression or after an open-group or an alternation operator; + $ is an anchor if it is at the end of a regular expression, or + before a close-group or an alternation operator. + + This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because + POSIX draft 11.2 says that * etc. in leading positions is undefined. + We already implemented a previous draft which made those constructs + invalid, though, so we haven't changed the code back. */ +# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) + +/* If this bit is set, then special characters are always special + regardless of where they are in the pattern. + If this bit is not set, then special characters are special only in + some contexts; otherwise they are ordinary. Specifically, + * + ? and intervals are only special when not after the beginning, + open-group, or alternation operator. */ +# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) + +/* If this bit is set, then *, +, ?, and { cannot be first in an re or + immediately after an alternation or begin-group operator. */ +# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) + +/* If this bit is set, then . matches newline. + If not set, then it doesn't. */ +# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) + +/* If this bit is set, then . doesn't match NUL. + If not set, then it does. */ +# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) + +/* If this bit is set, nonmatching lists [^...] do not match newline. + If not set, they do. */ +# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) + +/* If this bit is set, either \{...\} or {...} defines an + interval, depending on RE_NO_BK_BRACES. + If not set, \{, \}, {, and } are literals. */ +# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) + +/* If this bit is set, +, ? and | aren't recognized as operators. + If not set, they are. */ +# define RE_LIMITED_OPS (RE_INTERVALS << 1) + +/* If this bit is set, newline is an alternation operator. + If not set, newline is literal. */ +# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) + +/* If this bit is set, then `{...}' defines an interval, and \{ and \} + are literals. + If not set, then `\{...\}' defines an interval. */ +# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) + +/* If this bit is set, (...) defines a group, and \( and \) are literals. + If not set, \(...\) defines a group, and ( and ) are literals. */ +# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) + +/* If this bit is set, then \ matches . + If not set, then \ is a back-reference. */ +# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) + +/* If this bit is set, then | is an alternation operator, and \| is literal. + If not set, then \| is an alternation operator, and | is literal. */ +# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) + +/* If this bit is set, then an ending range point collating higher + than the starting range point, as in [z-a], is invalid. + If not set, then when ending range point collates higher than the + starting range point, the range is ignored. */ +# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) + +/* If this bit is set, then an unmatched ) is ordinary. + If not set, then an unmatched ) is invalid. */ +# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) + +/* If this bit is set, succeed as soon as we match the whole pattern, + without further backtracking. */ +# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) + +/* If this bit is set, do not process the GNU regex operators. + If not set, then the GNU regex operators are recognized. */ +# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) + +/* If this bit is set, turn on internal regex debugging. + If not set, and debugging was on, turn it off. + This only works if regex.c is compiled -DDEBUG. + We define this bit always, so that all that's needed to turn on + debugging is to recompile regex.c; the calling code can always have + this bit set, and it won't affect anything in the normal case. */ +# define RE_DEBUG (RE_NO_GNU_OPS << 1) + +/* If this bit is set, a syntactically invalid interval is treated as + a string of ordinary characters. For example, the ERE 'a{1' is + treated as 'a\{1'. */ +# define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) + +/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only + for ^, because it is difficult to scan the regex backwards to find + whether ^ should be special. */ +# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) + +/* If this bit is set, then \{ cannot be first in an bre or + immediately after an alternation or begin-group operator. */ +# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) + +/* If this bit is set, then no_sub will be set to 1 during + re_compile_pattern. */ +# define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) +#endif + +/* This global variable defines the particular regexp syntax to use (for + some interfaces). When a regexp is compiled, the syntax used is + stored in the pattern buffer, so changing this does not affect + already-compiled regexps. */ +extern reg_syntax_t re_syntax_options; + +#ifdef __USE_GNU +/* Define combinations of the above bits for the standard possibilities. + (The [[[ comments delimit what gets put into the Texinfo file, so + don't delete them!) */ +/* [[[begin syntaxes]]] */ +#define RE_SYNTAX_EMACS 0 + +#define RE_SYNTAX_AWK \ + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ + | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ + | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GNU_AWK \ + ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ + & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ + | RE_CONTEXT_INVALID_OPS )) + +#define RE_SYNTAX_POSIX_AWK \ + (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ + | RE_INTERVALS | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GREP \ + (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ + | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ + | RE_NEWLINE_ALT) + +#define RE_SYNTAX_EGREP \ + (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ + | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ + | RE_NO_BK_VBAR) + +#define RE_SYNTAX_POSIX_EGREP \ + (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ + | RE_INVALID_INTERVAL_ORD) + +/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ +#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC + +#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC + +/* Syntax bits common to both basic and extended POSIX regex syntax. */ +#define _RE_SYNTAX_POSIX_COMMON \ + (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ + | RE_INTERVALS | RE_NO_EMPTY_RANGES) + +#define RE_SYNTAX_POSIX_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) + +/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes + RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this + isn't minimal, since other operators, such as \`, aren't disabled. */ +#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) + +#define RE_SYNTAX_POSIX_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) + +/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is + removed and RE_NO_BK_REFS is added. */ +#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) +/* [[[end syntaxes]]] */ + +/* Maximum number of duplicates an interval can allow. Some systems + (erroneously) define this in other header files, but we want our + value, so remove any previous define. */ +# ifdef RE_DUP_MAX +# undef RE_DUP_MAX +# endif +/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ +# define RE_DUP_MAX (0x7fff) +#endif + + +/* POSIX `cflags' bits (i.e., information for `regcomp'). */ + +/* If this bit is set, then use extended regular expression syntax. + If not set, then use basic regular expression syntax. */ +#define REG_EXTENDED 1 + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +#define REG_ICASE (REG_EXTENDED << 1) + +/* If this bit is set, then anchors do not match at newline + characters in the string. + If not set, then anchors do match at newlines. */ +#define REG_NEWLINE (REG_ICASE << 1) + +/* If this bit is set, then report only success or fail in regexec. + If not set, then returns differ between not matching and errors. */ +#define REG_NOSUB (REG_NEWLINE << 1) + + +/* POSIX `eflags' bits (i.e., information for regexec). */ + +/* If this bit is set, then the beginning-of-line operator doesn't match + the beginning of the string (presumably because it's not the + beginning of a line). + If not set, then the beginning-of-line operator does match the + beginning of the string. */ +#define REG_NOTBOL 1 + +/* Like REG_NOTBOL, except for the end-of-line. */ +#define REG_NOTEOL (1 << 1) + +/* Use PMATCH[0] to delimit the start and end of the search in the + buffer. */ +#define REG_STARTEND (1 << 2) + + +/* If any error codes are removed, changed, or added, update the + `re_error_msg' table in regex.c. */ +typedef enum +{ +#if defined _XOPEN_SOURCE || defined __USE_XOPEN2K + REG_ENOSYS = -1, /* This will never happen for this implementation. */ +#endif + + REG_NOERROR = 0, /* Success. */ + REG_NOMATCH, /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ + REG_BADPAT, /* Invalid pattern. */ + REG_ECOLLATE, /* Inalid collating element. */ + REG_ECTYPE, /* Invalid character class name. */ + REG_EESCAPE, /* Trailing backslash. */ + REG_ESUBREG, /* Invalid back reference. */ + REG_EBRACK, /* Unmatched left bracket. */ + REG_EPAREN, /* Parenthesis imbalance. */ + REG_EBRACE, /* Unmatched \{. */ + REG_BADBR, /* Invalid contents of \{\}. */ + REG_ERANGE, /* Invalid range end. */ + REG_ESPACE, /* Ran out of memory. */ + REG_BADRPT, /* No preceding re for repetition op. */ + + /* Error codes we've added. */ + REG_EEND, /* Premature end. */ + REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ + REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ +} reg_errcode_t; + +/* This data structure represents a compiled pattern. Before calling + the pattern compiler, the fields `buffer', `allocated', `fastmap', + `translate', and `no_sub' can be set. After the pattern has been + compiled, the `re_nsub' field is available. All other fields are + private to the regex routines. */ + +#ifndef RE_TRANSLATE_TYPE +# define __RE_TRANSLATE_TYPE unsigned char * +# ifdef __USE_GNU +# define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE +# endif +#endif + +#ifdef __USE_GNU +# define __REPB_PREFIX(name) name +#else +# define __REPB_PREFIX(name) __##name +#endif + +struct re_pattern_buffer +{ + /* Space that holds the compiled pattern. It is declared as + `unsigned char *' because its elements are sometimes used as + array indexes. */ + unsigned char *__REPB_PREFIX(buffer); + + /* Number of bytes to which `buffer' points. */ + unsigned long int __REPB_PREFIX(allocated); + + /* Number of bytes actually used in `buffer'. */ + unsigned long int __REPB_PREFIX(used); + + /* Syntax setting with which the pattern was compiled. */ + reg_syntax_t __REPB_PREFIX(syntax); + + /* Pointer to a fastmap, if any, otherwise zero. re_search uses the + fastmap, if there is one, to skip over impossible starting points + for matches. */ + char *__REPB_PREFIX(fastmap); + + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation is + applied to a pattern when it is compiled and to a string when it + is matched. */ + __RE_TRANSLATE_TYPE __REPB_PREFIX(translate); + + /* Number of subexpressions found by the compiler. */ + size_t re_nsub; + + /* Zero if this pattern cannot match the empty string, one else. + Well, in truth it's used only in `re_search_2', to see whether or + not we should use the fastmap, so we don't set this absolutely + perfectly; see `re_compile_fastmap' (the `duplicate' case). */ + unsigned __REPB_PREFIX(can_be_null) : 1; + + /* If REGS_UNALLOCATED, allocate space in the `regs' structure + for `max (RE_NREGS, re_nsub + 1)' groups. + If REGS_REALLOCATE, reallocate space if necessary. + If REGS_FIXED, use what's there. */ +#ifdef __USE_GNU +# define REGS_UNALLOCATED 0 +# define REGS_REALLOCATE 1 +# define REGS_FIXED 2 +#endif + unsigned __REPB_PREFIX(regs_allocated) : 2; + + /* Set to zero when `regex_compile' compiles a pattern; set to one + by `re_compile_fastmap' if it updates the fastmap. */ + unsigned __REPB_PREFIX(fastmap_accurate) : 1; + + /* If set, `re_match_2' does not return information about + subexpressions. */ + unsigned __REPB_PREFIX(no_sub) : 1; + + /* If set, a beginning-of-line anchor doesn't match at the beginning + of the string. */ + unsigned __REPB_PREFIX(not_bol) : 1; + + /* Similarly for an end-of-line anchor. */ + unsigned __REPB_PREFIX(not_eol) : 1; + + /* If true, an anchor at a newline matches. */ + unsigned __REPB_PREFIX(newline_anchor) : 1; +}; + +typedef struct re_pattern_buffer regex_t; + +/* Type for byte offsets within the string. POSIX mandates this. */ +typedef int regoff_t; + + +#ifdef __USE_GNU +/* This is the structure we store register match data in. See + regex.texinfo for a full description of what registers match. */ +struct re_registers +{ + unsigned num_regs; + regoff_t *start; + regoff_t *end; +}; + + +/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, + `re_match_2' returns information about at least this many registers + the first time a `regs' structure is passed. */ +# ifndef RE_NREGS +# define RE_NREGS 30 +# endif +#endif + + +/* POSIX specification for registers. Aside from the different names than + `re_registers', POSIX uses an array of structures, instead of a + structure of arrays. */ +typedef struct +{ + regoff_t rm_so; /* Byte offset from string's start to substring's start. */ + regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ +} regmatch_t; + +/* Declarations for routines. */ + +#ifdef __USE_GNU +/* Sets the current default syntax to SYNTAX, and return the old syntax. + You can also simply assign to the `re_syntax_options' variable. */ +extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); + +/* Compile the regular expression PATTERN, with length LENGTH + and syntax given by the global `re_syntax_options', into the buffer + BUFFER. Return NULL if successful, and an error string if not. */ +extern const char *re_compile_pattern (const char *__pattern, size_t __length, + struct re_pattern_buffer *__buffer); + + +/* Compile a fastmap for the compiled pattern in BUFFER; used to + accelerate searches. Return 0 if successful and -2 if was an + internal error. */ +extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); + + +/* Search in the string STRING (with length LENGTH) for the pattern + compiled into BUFFER. Start searching at position START, for RANGE + characters. Return the starting position of the match, -1 for no + match, or -2 for an internal error. Also return register + information in REGS (if REGS and BUFFER->no_sub are nonzero). */ +extern int re_search (struct re_pattern_buffer *__buffer, const char *__string, + int __length, int __start, int __range, + struct re_registers *__regs); + + +/* Like `re_search', but search in the concatenation of STRING1 and + STRING2. Also, stop searching at index START + STOP. */ +extern int re_search_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + int __range, struct re_registers *__regs, int __stop); + + +/* Like `re_search', but return how many characters in STRING the regexp + in BUFFER matched, starting at position START. */ +extern int re_match (struct re_pattern_buffer *__buffer, const char *__string, + int __length, int __start, struct re_registers *__regs); + + +/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ +extern int re_match_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + struct re_registers *__regs, int __stop); + + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using BUFFER and REGS will use this memory + for recording register information. STARTS and ENDS must be + allocated with malloc, and must each be at least `NUM_REGS * sizeof + (regoff_t)' bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ +extern void re_set_registers (struct re_pattern_buffer *__buffer, + struct re_registers *__regs, + unsigned int __num_regs, + regoff_t *__starts, regoff_t *__ends); +#endif /* Use GNU */ + +#if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_BSD) +# ifndef _CRAY +/* 4.2 bsd compatibility. */ +extern char *re_comp (const char *); +extern int re_exec (const char *); +# endif +#endif + +/* GCC 2.95 and later have "__restrict"; C99 compilers have + "restrict", and "configure" may have defined "restrict". */ +#ifndef __restrict +# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) +# if defined restrict || 199901L <= __STDC_VERSION__ +# define __restrict restrict +# else +# define __restrict +# endif +# endif +#endif +/* gcc 3.1 and up support the [restrict] syntax. */ +#ifndef __restrict_arr +# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \ + && !defined __GNUG__ +# define __restrict_arr __restrict +# else +# define __restrict_arr +# endif +#endif + +/* POSIX compatibility. */ +extern int regcomp (regex_t *__restrict __preg, + const char *__restrict __pattern, + int __cflags); + +extern int regexec (const regex_t *__restrict __preg, + const char *__restrict __string, size_t __nmatch, + regmatch_t __pmatch[__restrict_arr], + int __eflags); + +extern size_t regerror (int __errcode, const regex_t *__restrict __preg, + char *__restrict __errbuf, size_t __errbuf_size); + +extern void regfree (regex_t *__preg); + + +#ifdef __cplusplus +} +#endif /* C++ */ + +#endif /* regex.h */ diff --git a/gnu_regex/.svn/text-base/regex_internal.c.svn-base b/gnu_regex/.svn/text-base/regex_internal.c.svn-base new file mode 100644 index 0000000..c9da2b9 --- /dev/null +++ b/gnu_regex/.svn/text-base/regex_internal.c.svn-base @@ -0,0 +1,1713 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +static void re_string_construct_common (const char *str, int len, + re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) internal_function; +static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa, + const re_node_set *nodes, + unsigned int hash) internal_function; +static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, + const re_node_set *nodes, + unsigned int context, + unsigned int hash) internal_function; + +/* Functions for string operation. */ + +/* This function allocate the buffers. It is necessary to call + re_string_reconstruct before using the object. */ + +static reg_errcode_t +internal_function +re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) +{ + reg_errcode_t ret; + int init_buf_len; + + /* Ensure at least one character fits into the buffers. */ + if (init_len < dfa->mb_cur_max) + init_len = dfa->mb_cur_max; + init_buf_len = (len + 1 < init_len) ? len + 1: init_len; + re_string_construct_common (str, len, pstr, trans, icase, dfa); + + ret = re_string_realloc_buffers (pstr, init_buf_len); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + pstr->word_char = dfa->word_char; + pstr->word_ops_used = dfa->word_ops_used; + pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; + pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len; + pstr->valid_raw_len = pstr->valid_len; + return REG_NOERROR; +} + +/* This function allocate the buffers, and initialize them. */ + +static reg_errcode_t +internal_function +re_string_construct (re_string_t *pstr, const char *str, int len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) +{ + reg_errcode_t ret; + memset (pstr, '\0', sizeof (re_string_t)); + re_string_construct_common (str, len, pstr, trans, icase, dfa); + + if (len > 0) + { + ret = re_string_realloc_buffers (pstr, len + 1); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; + + if (icase) + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + while (1) + { + ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + if (pstr->valid_raw_len >= len) + break; + if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max) + break; + ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + } + else +#endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); + } + else + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + build_wcs_buffer (pstr); + else +#endif /* RE_ENABLE_I18N */ + { + if (trans != NULL) + re_string_translate_buffer (pstr); + else + { + pstr->valid_len = pstr->bufs_len; + pstr->valid_raw_len = pstr->bufs_len; + } + } + } + + return REG_NOERROR; +} + +/* Helper functions for re_string_allocate, and re_string_construct. */ + +static reg_errcode_t +internal_function +re_string_realloc_buffers (re_string_t *pstr, int new_buf_len) +{ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); + if (BE (new_wcs == NULL, 0)) + return REG_ESPACE; + pstr->wcs = new_wcs; + if (pstr->offsets != NULL) + { + int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len); + if (BE (new_offsets == NULL, 0)) + return REG_ESPACE; + pstr->offsets = new_offsets; + } + } +#endif /* RE_ENABLE_I18N */ + if (pstr->mbs_allocated) + { + unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, + new_buf_len); + if (BE (new_mbs == NULL, 0)) + return REG_ESPACE; + pstr->mbs = new_mbs; + } + pstr->bufs_len = new_buf_len; + return REG_NOERROR; +} + + +static void +internal_function +re_string_construct_common (const char *str, int len, re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) +{ + pstr->raw_mbs = (const unsigned char *) str; + pstr->len = len; + pstr->raw_len = len; + pstr->trans = trans; + pstr->icase = icase ? 1 : 0; + pstr->mbs_allocated = (trans != NULL || icase); + pstr->mb_cur_max = dfa->mb_cur_max; + pstr->is_utf8 = dfa->is_utf8; + pstr->map_notascii = dfa->map_notascii; + pstr->stop = pstr->len; + pstr->raw_stop = pstr->stop; +} + +#ifdef RE_ENABLE_I18N + +/* Build wide character buffer PSTR->WCS. + If the byte sequence of the string are: + (0), (1), (0), (1), + Then wide character buffer will be: + , WEOF , , WEOF , + We use WEOF for padding, they indicate that the position isn't + a first byte of a multibyte character. + + Note that this function assumes PSTR->VALID_LEN elements are already + built and starts from PSTR->VALID_LEN. */ + +static void +internal_function +build_wcs_buffer (re_string_t *pstr) +{ +#ifdef _LIBC + unsigned char buf[MB_LEN_MAX]; + assert (MB_LEN_MAX >= pstr->mb_cur_max); +#else + unsigned char buf[64]; +#endif + mbstate_t prev_st; + int byte_idx, end_idx, remain_len; + size_t mbclen; + + /* Build the buffers from pstr->valid_len to either pstr->len or + pstr->bufs_len. */ + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + for (byte_idx = pstr->valid_len; byte_idx < end_idx;) + { + wchar_t wc; + const char *p; + + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + /* Apply the translation if we need. */ + if (BE (pstr->trans != NULL, 0)) + { + int i, ch; + + for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + { + ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i]; + buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch]; + } + p = (const char *) buf; + } + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; + mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); + if (BE (mbclen == (size_t) -2, 0)) + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) + { + /* We treat these cases as a singlebyte character. */ + mbclen = 1; + wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + if (BE (pstr->trans != NULL, 0)) + wc = pstr->trans[wc]; + pstr->cur_state = prev_st; + } + + /* Write wide character and padding. */ + pstr->wcs[byte_idx++] = wc; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = byte_idx; +} + +/* Build wide character buffer PSTR->WCS like build_wcs_buffer, + but for REG_ICASE. */ + +static reg_errcode_t +internal_function +build_wcs_upper_buffer (re_string_t *pstr) +{ + mbstate_t prev_st; + int src_idx, byte_idx, end_idx, remain_len; + size_t mbclen; +#ifdef _LIBC + char buf[MB_LEN_MAX]; + assert (MB_LEN_MAX >= pstr->mb_cur_max); +#else + char buf[64]; +#endif + + byte_idx = pstr->valid_len; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + /* The following optimization assumes that ASCII characters can be + mapped to wide characters with a simple cast. */ + if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed) + { + while (byte_idx < end_idx) + { + wchar_t wc; + + if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]) + && mbsinit (&pstr->cur_state)) + { + /* In case of a singlebyte character. */ + pstr->mbs[byte_idx] + = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]); + /* The next step uses the assumption that wchar_t is encoded + ASCII-safe: all ASCII values can be converted like this. */ + pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx]; + ++byte_idx; + continue; + } + + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + mbclen = __mbrtowc (&wc, + ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx + + byte_idx), remain_len, &pstr->cur_state); + if (BE (mbclen + 2 > 2, 1)) + { + wchar_t wcu = wc; + if (iswlower (wc)) + { + size_t mbcdlen; + + wcu = towupper (wc); + mbcdlen = wcrtomb (buf, wcu, &prev_st); + if (BE (mbclen == mbcdlen, 1)) + memcpy (pstr->mbs + byte_idx, buf, mbclen); + else + { + src_idx = byte_idx; + goto offsets_needed; + } + } + else + memcpy (pstr->mbs + byte_idx, + pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); + pstr->wcs[byte_idx++] = wcu; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* It is an invalid character or '\0'. Just use the byte. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + pstr->mbs[byte_idx] = ch; + /* And also cast it to wide char. */ + pstr->wcs[byte_idx++] = (wchar_t) ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = byte_idx; + return REG_NOERROR; + } + else + for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;) + { + wchar_t wc; + const char *p; + offsets_needed: + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + if (BE (pstr->trans != NULL, 0)) + { + int i, ch; + + for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + { + ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i]; + buf[i] = pstr->trans[ch]; + } + p = (const char *) buf; + } + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx; + mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); + if (BE (mbclen + 2 > 2, 1)) + { + wchar_t wcu = wc; + if (iswlower (wc)) + { + size_t mbcdlen; + + wcu = towupper (wc); + mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); + if (BE (mbclen == mbcdlen, 1)) + memcpy (pstr->mbs + byte_idx, buf, mbclen); + else if (mbcdlen != (size_t) -1) + { + size_t i; + + if (byte_idx + mbcdlen > pstr->bufs_len) + { + pstr->cur_state = prev_st; + break; + } + + if (pstr->offsets == NULL) + { + pstr->offsets = re_malloc (int, pstr->bufs_len); + + if (pstr->offsets == NULL) + return REG_ESPACE; + } + if (!pstr->offsets_needed) + { + for (i = 0; i < (size_t) byte_idx; ++i) + pstr->offsets[i] = i; + pstr->offsets_needed = 1; + } + + memcpy (pstr->mbs + byte_idx, buf, mbcdlen); + pstr->wcs[byte_idx] = wcu; + pstr->offsets[byte_idx] = src_idx; + for (i = 1; i < mbcdlen; ++i) + { + pstr->offsets[byte_idx + i] + = src_idx + (i < mbclen ? i : mbclen - 1); + pstr->wcs[byte_idx + i] = WEOF; + } + pstr->len += mbcdlen - mbclen; + if (pstr->raw_stop > src_idx) + pstr->stop += mbcdlen - mbclen; + end_idx = (pstr->bufs_len > pstr->len) + ? pstr->len : pstr->bufs_len; + byte_idx += mbcdlen; + src_idx += mbclen; + continue; + } + else + memcpy (pstr->mbs + byte_idx, p, mbclen); + } + else + memcpy (pstr->mbs + byte_idx, p, mbclen); + + if (BE (pstr->offsets_needed != 0, 0)) + { + size_t i; + for (i = 0; i < mbclen; ++i) + pstr->offsets[byte_idx + i] = src_idx + i; + } + src_idx += mbclen; + + pstr->wcs[byte_idx++] = wcu; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* It is an invalid character or '\0'. Just use the byte. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; + + if (BE (pstr->trans != NULL, 0)) + ch = pstr->trans [ch]; + pstr->mbs[byte_idx] = ch; + + if (BE (pstr->offsets_needed != 0, 0)) + pstr->offsets[byte_idx] = src_idx; + ++src_idx; + + /* And also cast it to wide char. */ + pstr->wcs[byte_idx++] = (wchar_t) ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = src_idx; + return REG_NOERROR; +} + +/* Skip characters until the index becomes greater than NEW_RAW_IDX. + Return the index. */ + +static int +internal_function +re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc) +{ + mbstate_t prev_st; + int rawbuf_idx; + size_t mbclen; + wchar_t wc = WEOF; + + /* Skip the characters which are not necessary to check. */ + for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len; + rawbuf_idx < new_raw_idx;) + { + int remain_len; + remain_len = pstr->len - rawbuf_idx; + prev_st = pstr->cur_state; + mbclen = __mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx, + remain_len, &pstr->cur_state); + if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) + { + /* We treat these cases as a single byte character. */ + if (mbclen == 0 || remain_len == 0) + wc = L'\0'; + else + wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx); + mbclen = 1; + pstr->cur_state = prev_st; + } + /* Then proceed the next character. */ + rawbuf_idx += mbclen; + } + *last_wc = (wint_t) wc; + return rawbuf_idx; +} +#endif /* RE_ENABLE_I18N */ + +/* Build the buffer PSTR->MBS, and apply the translation if we need. + This function is used in case of REG_ICASE. */ + +static void +internal_function +build_upper_buffer (re_string_t *pstr) +{ + int char_idx, end_idx; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx) + { + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; + if (BE (pstr->trans != NULL, 0)) + ch = pstr->trans[ch]; + if (islower (ch)) + pstr->mbs[char_idx] = toupper (ch); + else + pstr->mbs[char_idx] = ch; + } + pstr->valid_len = char_idx; + pstr->valid_raw_len = char_idx; +} + +/* Apply TRANS to the buffer in PSTR. */ + +static void +internal_function +re_string_translate_buffer (re_string_t *pstr) +{ + int buf_idx, end_idx; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx) + { + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx]; + pstr->mbs[buf_idx] = pstr->trans[ch]; + } + + pstr->valid_len = buf_idx; + pstr->valid_raw_len = buf_idx; +} + +/* This function re-construct the buffers. + Concretely, convert to wide character in case of pstr->mb_cur_max > 1, + convert to upper case in case of REG_ICASE, apply translation. */ + +static reg_errcode_t +internal_function +re_string_reconstruct (re_string_t *pstr, int idx, int eflags) +{ + int offset = idx - pstr->raw_mbs_idx; + if (BE (offset < 0, 0)) + { + /* Reset buffer. */ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); +#endif /* RE_ENABLE_I18N */ + pstr->len = pstr->raw_len; + pstr->stop = pstr->raw_stop; + pstr->valid_len = 0; + pstr->raw_mbs_idx = 0; + pstr->valid_raw_len = 0; + pstr->offsets_needed = 0; + pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF + : CONTEXT_NEWLINE | CONTEXT_BEGBUF); + if (!pstr->mbs_allocated) + pstr->mbs = (unsigned char *) pstr->raw_mbs; + offset = idx; + } + + if (BE (offset != 0, 1)) + { + /* Should the already checked characters be kept? */ + if (BE (offset < pstr->valid_raw_len, 1)) + { + /* Yes, move them to the front of the buffer. */ +#ifdef RE_ENABLE_I18N + if (BE (pstr->offsets_needed, 0)) + { + int low = 0, high = pstr->valid_len, mid; + do + { + mid = (high + low) / 2; + if (pstr->offsets[mid] > offset) + high = mid; + else if (pstr->offsets[mid] < offset) + low = mid + 1; + else + break; + } + while (low < high); + if (pstr->offsets[mid] < offset) + ++mid; + pstr->tip_context = re_string_context_at (pstr, mid - 1, + eflags); + /* This can be quite complicated, so handle specially + only the common and easy case where the character with + different length representation of lower and upper + case is present at or after offset. */ + if (pstr->valid_len > offset + && mid == offset && pstr->offsets[mid] == offset) + { + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); + memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset); + pstr->valid_len -= offset; + pstr->valid_raw_len -= offset; + for (low = 0; low < pstr->valid_len; low++) + pstr->offsets[low] = pstr->offsets[low + offset] - offset; + } + else + { + /* Otherwise, just find out how long the partial multibyte + character at offset is and fill it with WEOF/255. */ + pstr->len = pstr->raw_len - idx + offset; + pstr->stop = pstr->raw_stop - idx + offset; + pstr->offsets_needed = 0; + while (mid > 0 && pstr->offsets[mid - 1] == offset) + --mid; + while (mid < pstr->valid_len) + if (pstr->wcs[mid] != WEOF) + break; + else + ++mid; + if (mid == pstr->valid_len) + pstr->valid_len = 0; + else + { + pstr->valid_len = pstr->offsets[mid] - offset; + if (pstr->valid_len) + { + for (low = 0; low < pstr->valid_len; ++low) + pstr->wcs[low] = WEOF; + memset (pstr->mbs, 255, pstr->valid_len); + } + } + pstr->valid_raw_len = pstr->valid_len; + } + } + else +#endif + { + pstr->tip_context = re_string_context_at (pstr, offset - 1, + eflags); +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); +#endif /* RE_ENABLE_I18N */ + if (BE (pstr->mbs_allocated, 0)) + memmove (pstr->mbs, pstr->mbs + offset, + pstr->valid_len - offset); + pstr->valid_len -= offset; + pstr->valid_raw_len -= offset; +#if DEBUG + assert (pstr->valid_len > 0); +#endif + } + } + else + { + /* No, skip all characters until IDX. */ + int prev_valid_len = pstr->valid_len; + +#ifdef RE_ENABLE_I18N + if (BE (pstr->offsets_needed, 0)) + { + pstr->len = pstr->raw_len - idx + offset; + pstr->stop = pstr->raw_stop - idx + offset; + pstr->offsets_needed = 0; + } +#endif + pstr->valid_len = 0; +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + int wcs_idx; + wint_t wc = WEOF; + + if (pstr->is_utf8) + { + const unsigned char *raw, *p, *q, *end; + + /* Special case UTF-8. Multi-byte chars start with any + byte other than 0x80 - 0xbf. */ + raw = pstr->raw_mbs + pstr->raw_mbs_idx; + end = raw + (offset - pstr->mb_cur_max); + if (end < pstr->raw_mbs) + end = pstr->raw_mbs; + p = raw + offset - 1; +#ifdef _LIBC + /* We know the wchar_t encoding is UCS4, so for the simple + case, ASCII characters, skip the conversion step. */ + if (isascii (*p) && BE (pstr->trans == NULL, 1)) + { + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); + /* pstr->valid_len = 0; */ + wc = (wchar_t) *p; + } + else +#endif + for (; p >= end; --p) + if ((*p & 0xc0) != 0x80) + { + mbstate_t cur_state; + wchar_t wc2; + int mlen = raw + pstr->len - p; + unsigned char buf[6]; + size_t mbclen; + + q = p; + if (BE (pstr->trans != NULL, 0)) + { + int i = mlen < 6 ? mlen : 6; + while (--i >= 0) + buf[i] = pstr->trans[p[i]]; + q = buf; + } + /* XXX Don't use mbrtowc, we know which conversion + to use (UTF-8 -> UCS4). */ + memset (&cur_state, 0, sizeof (cur_state)); + mbclen = __mbrtowc (&wc2, (const char *) p, mlen, + &cur_state); + if (raw + offset - p <= mbclen + && mbclen < (size_t) -2) + { + memset (&pstr->cur_state, '\0', + sizeof (mbstate_t)); + pstr->valid_len = mbclen - (raw + offset - p); + wc = wc2; + } + break; + } + } + + if (wc == WEOF) + pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; + if (wc == WEOF) + pstr->tip_context + = re_string_context_at (pstr, prev_valid_len - 1, eflags); + else + pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0) + && IS_WIDE_WORD_CHAR (wc)) + ? CONTEXT_WORD + : ((IS_WIDE_NEWLINE (wc) + && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); + if (BE (pstr->valid_len, 0)) + { + for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) + pstr->wcs[wcs_idx] = WEOF; + if (pstr->mbs_allocated) + memset (pstr->mbs, 255, pstr->valid_len); + } + pstr->valid_raw_len = pstr->valid_len; + } + else +#endif /* RE_ENABLE_I18N */ + { + int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; + pstr->valid_raw_len = 0; + if (pstr->trans) + c = pstr->trans[c]; + pstr->tip_context = (bitset_contain (pstr->word_char, c) + ? CONTEXT_WORD + : ((IS_NEWLINE (c) && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); + } + } + if (!BE (pstr->mbs_allocated, 0)) + pstr->mbs += offset; + } + pstr->raw_mbs_idx = idx; + pstr->len -= offset; + pstr->stop -= offset; + + /* Then build the buffers. */ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + if (pstr->icase) + { + reg_errcode_t ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + else + build_wcs_buffer (pstr); + } + else +#endif /* RE_ENABLE_I18N */ + if (BE (pstr->mbs_allocated, 0)) + { + if (pstr->icase) + build_upper_buffer (pstr); + else if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + else + pstr->valid_len = pstr->len; + + pstr->cur_idx = 0; + return REG_NOERROR; +} + +static unsigned char +internal_function __attribute ((pure)) +re_string_peek_byte_case (const re_string_t *pstr, int idx) +{ + int ch, off; + + /* Handle the common (easiest) cases first. */ + if (BE (!pstr->mbs_allocated, 1)) + return re_string_peek_byte (pstr, idx); + +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1 + && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx)) + return re_string_peek_byte (pstr, idx); +#endif + + off = pstr->cur_idx + idx; +#ifdef RE_ENABLE_I18N + if (pstr->offsets_needed) + off = pstr->offsets[off]; +#endif + + ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; + +#ifdef RE_ENABLE_I18N + /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I + this function returns CAPITAL LETTER I instead of first byte of + DOTLESS SMALL LETTER I. The latter would confuse the parser, + since peek_byte_case doesn't advance cur_idx in any way. */ + if (pstr->offsets_needed && !isascii (ch)) + return re_string_peek_byte (pstr, idx); +#endif + + return ch; +} + +static unsigned char +internal_function __attribute ((pure)) +re_string_fetch_byte_case (re_string_t *pstr) +{ + if (BE (!pstr->mbs_allocated, 1)) + return re_string_fetch_byte (pstr); + +#ifdef RE_ENABLE_I18N + if (pstr->offsets_needed) + { + int off, ch; + + /* For tr_TR.UTF-8 [[:islower:]] there is + [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip + in that case the whole multi-byte character and return + the original letter. On the other side, with + [[: DOTLESS SMALL LETTER I return [[:I, as doing + anything else would complicate things too much. */ + + if (!re_string_first_byte (pstr, pstr->cur_idx)) + return re_string_fetch_byte (pstr); + + off = pstr->offsets[pstr->cur_idx]; + ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; + + if (! isascii (ch)) + return re_string_fetch_byte (pstr); + + re_string_skip_bytes (pstr, + re_string_char_size_at (pstr, pstr->cur_idx)); + return ch; + } +#endif + + return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; +} + +static void +internal_function +re_string_destruct (re_string_t *pstr) +{ +#ifdef RE_ENABLE_I18N + re_free (pstr->wcs); + re_free (pstr->offsets); +#endif /* RE_ENABLE_I18N */ + if (pstr->mbs_allocated) + re_free (pstr->mbs); +} + +/* Return the context at IDX in INPUT. */ + +static unsigned int +internal_function +re_string_context_at (const re_string_t *input, int idx, int eflags) +{ + int c; + if (BE (idx < 0, 0)) + /* In this case, we use the value stored in input->tip_context, + since we can't know the character in input->mbs[-1] here. */ + return input->tip_context; + if (BE (idx == input->len, 0)) + return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF + : CONTEXT_NEWLINE | CONTEXT_ENDBUF); +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc; + int wc_idx = idx; + while(input->wcs[wc_idx] == WEOF) + { +#ifdef DEBUG + /* It must not happen. */ + assert (wc_idx >= 0); +#endif + --wc_idx; + if (wc_idx < 0) + return input->tip_context; + } + wc = input->wcs[wc_idx]; + if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc)) + return CONTEXT_WORD; + return (IS_WIDE_NEWLINE (wc) && input->newline_anchor + ? CONTEXT_NEWLINE : 0); + } + else +#endif + { + c = re_string_byte_at (input, idx); + if (bitset_contain (input->word_char, c)) + return CONTEXT_WORD; + return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0; + } +} + +/* Functions for set operation. */ + +static reg_errcode_t +internal_function +re_node_set_alloc (re_node_set *set, int size) +{ + set->alloc = size; + set->nelem = 0; + set->elems = re_malloc (int, size); + if (BE (set->elems == NULL, 0)) + return REG_ESPACE; + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_1 (re_node_set *set, int elem) +{ + set->alloc = 1; + set->nelem = 1; + set->elems = re_malloc (int, 1); + if (BE (set->elems == NULL, 0)) + { + set->alloc = set->nelem = 0; + return REG_ESPACE; + } + set->elems[0] = elem; + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_2 (re_node_set *set, int elem1, int elem2) +{ + set->alloc = 2; + set->elems = re_malloc (int, 2); + if (BE (set->elems == NULL, 0)) + return REG_ESPACE; + if (elem1 == elem2) + { + set->nelem = 1; + set->elems[0] = elem1; + } + else + { + set->nelem = 2; + if (elem1 < elem2) + { + set->elems[0] = elem1; + set->elems[1] = elem2; + } + else + { + set->elems[0] = elem2; + set->elems[1] = elem1; + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_copy (re_node_set *dest, const re_node_set *src) +{ + dest->nelem = src->nelem; + if (src->nelem > 0) + { + dest->alloc = dest->nelem; + dest->elems = re_malloc (int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + { + dest->alloc = dest->nelem = 0; + return REG_ESPACE; + } + memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); + } + else + re_node_set_init_empty (dest); + return REG_NOERROR; +} + +/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. + Note: We assume dest->elems is NULL, when dest->alloc is 0. */ + +static reg_errcode_t +internal_function +re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) +{ + int i1, i2, is, id, delta, sbase; + if (src1->nelem == 0 || src2->nelem == 0) + return REG_NOERROR; + + /* We need dest->nelem + 2 * elems_in_intersection; this is a + conservative estimate. */ + if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) + { + int new_alloc = src1->nelem + src2->nelem + dest->alloc; + int *new_elems = re_realloc (dest->elems, int, new_alloc); + if (BE (new_elems == NULL, 0)) + return REG_ESPACE; + dest->elems = new_elems; + dest->alloc = new_alloc; + } + + /* Find the items in the intersection of SRC1 and SRC2, and copy + into the top of DEST those that are not already in DEST itself. */ + sbase = dest->nelem + src1->nelem + src2->nelem; + i1 = src1->nelem - 1; + i2 = src2->nelem - 1; + id = dest->nelem - 1; + for (;;) + { + if (src1->elems[i1] == src2->elems[i2]) + { + /* Try to find the item in DEST. Maybe we could binary search? */ + while (id >= 0 && dest->elems[id] > src1->elems[i1]) + --id; + + if (id < 0 || dest->elems[id] != src1->elems[i1]) + dest->elems[--sbase] = src1->elems[i1]; + + if (--i1 < 0 || --i2 < 0) + break; + } + + /* Lower the highest of the two items. */ + else if (src1->elems[i1] < src2->elems[i2]) + { + if (--i2 < 0) + break; + } + else + { + if (--i1 < 0) + break; + } + } + + id = dest->nelem - 1; + is = dest->nelem + src1->nelem + src2->nelem - 1; + delta = is - sbase + 1; + + /* Now copy. When DELTA becomes zero, the remaining + DEST elements are already in place; this is more or + less the same loop that is in re_node_set_merge. */ + dest->nelem += delta; + if (delta > 0 && id >= 0) + for (;;) + { + if (dest->elems[is] > dest->elems[id]) + { + /* Copy from the top. */ + dest->elems[id + delta--] = dest->elems[is--]; + if (delta == 0) + break; + } + else + { + /* Slide from the bottom. */ + dest->elems[id + delta] = dest->elems[id]; + if (--id < 0) + break; + } + } + + /* Copy remaining SRC elements. */ + memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int)); + + return REG_NOERROR; +} + +/* Calculate the union set of the sets SRC1 and SRC2. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + +static reg_errcode_t +internal_function +re_node_set_init_union (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) +{ + int i1, i2, id; + if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0) + { + dest->alloc = src1->nelem + src2->nelem; + dest->elems = re_malloc (int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + return REG_ESPACE; + } + else + { + if (src1 != NULL && src1->nelem > 0) + return re_node_set_init_copy (dest, src1); + else if (src2 != NULL && src2->nelem > 0) + return re_node_set_init_copy (dest, src2); + else + re_node_set_init_empty (dest); + return REG_NOERROR; + } + for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) + { + if (src1->elems[i1] > src2->elems[i2]) + { + dest->elems[id++] = src2->elems[i2++]; + continue; + } + if (src1->elems[i1] == src2->elems[i2]) + ++i2; + dest->elems[id++] = src1->elems[i1++]; + } + if (i1 < src1->nelem) + { + memcpy (dest->elems + id, src1->elems + i1, + (src1->nelem - i1) * sizeof (int)); + id += src1->nelem - i1; + } + else if (i2 < src2->nelem) + { + memcpy (dest->elems + id, src2->elems + i2, + (src2->nelem - i2) * sizeof (int)); + id += src2->nelem - i2; + } + dest->nelem = id; + return REG_NOERROR; +} + +/* Calculate the union set of the sets DEST and SRC. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + +static reg_errcode_t +internal_function +re_node_set_merge (re_node_set *dest, const re_node_set *src) +{ + int is, id, sbase, delta; + if (src == NULL || src->nelem == 0) + return REG_NOERROR; + if (dest->alloc < 2 * src->nelem + dest->nelem) + { + int new_alloc = 2 * (src->nelem + dest->alloc); + int *new_buffer = re_realloc (dest->elems, int, new_alloc); + if (BE (new_buffer == NULL, 0)) + return REG_ESPACE; + dest->elems = new_buffer; + dest->alloc = new_alloc; + } + + if (BE (dest->nelem == 0, 0)) + { + dest->nelem = src->nelem; + memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); + return REG_NOERROR; + } + + /* Copy into the top of DEST the items of SRC that are not + found in DEST. Maybe we could binary search in DEST? */ + for (sbase = dest->nelem + 2 * src->nelem, + is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; ) + { + if (dest->elems[id] == src->elems[is]) + is--, id--; + else if (dest->elems[id] < src->elems[is]) + dest->elems[--sbase] = src->elems[is--]; + else /* if (dest->elems[id] > src->elems[is]) */ + --id; + } + + if (is >= 0) + { + /* If DEST is exhausted, the remaining items of SRC must be unique. */ + sbase -= is + 1; + memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int)); + } + + id = dest->nelem - 1; + is = dest->nelem + 2 * src->nelem - 1; + delta = is - sbase + 1; + if (delta == 0) + return REG_NOERROR; + + /* Now copy. When DELTA becomes zero, the remaining + DEST elements are already in place. */ + dest->nelem += delta; + for (;;) + { + if (dest->elems[is] > dest->elems[id]) + { + /* Copy from the top. */ + dest->elems[id + delta--] = dest->elems[is--]; + if (delta == 0) + break; + } + else + { + /* Slide from the bottom. */ + dest->elems[id + delta] = dest->elems[id]; + if (--id < 0) + { + /* Copy remaining SRC elements. */ + memcpy (dest->elems, dest->elems + sbase, + delta * sizeof (int)); + break; + } + } + } + + return REG_NOERROR; +} + +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have ELEM. + return -1 if an error is occured, return 1 otherwise. */ + +static int +internal_function +re_node_set_insert (re_node_set *set, int elem) +{ + int idx; + /* In case the set is empty. */ + if (set->alloc == 0) + { + if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1)) + return 1; + else + return -1; + } + + if (BE (set->nelem, 0) == 0) + { + /* We already guaranteed above that set->alloc != 0. */ + set->elems[0] = elem; + ++set->nelem; + return 1; + } + + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_elems; + set->alloc = set->alloc * 2; + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) + return -1; + set->elems = new_elems; + } + + /* Move the elements which follows the new element. Test the + first element separately to skip a check in the inner loop. */ + if (elem < set->elems[0]) + { + idx = 0; + for (idx = set->nelem; idx > 0; idx--) + set->elems[idx] = set->elems[idx - 1]; + } + else + { + for (idx = set->nelem; set->elems[idx - 1] > elem; idx--) + set->elems[idx] = set->elems[idx - 1]; + } + + /* Insert the new element. */ + set->elems[idx] = elem; + ++set->nelem; + return 1; +} + +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have any element greater than or equal to ELEM. + Return -1 if an error is occured, return 1 otherwise. */ + +static int +internal_function +re_node_set_insert_last (re_node_set *set, int elem) +{ + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_elems; + set->alloc = (set->alloc + 1) * 2; + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) + return -1; + set->elems = new_elems; + } + + /* Insert the new element. */ + set->elems[set->nelem++] = elem; + return 1; +} + +/* Compare two node sets SET1 and SET2. + return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */ + +static int +internal_function __attribute ((pure)) +re_node_set_compare (const re_node_set *set1, const re_node_set *set2) +{ + int i; + if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) + return 0; + for (i = set1->nelem ; --i >= 0 ; ) + if (set1->elems[i] != set2->elems[i]) + return 0; + return 1; +} + +/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ + +static int +internal_function __attribute ((pure)) +re_node_set_contains (const re_node_set *set, int elem) +{ + unsigned int idx, right, mid; + if (set->nelem <= 0) + return 0; + + /* Binary search the element. */ + idx = 0; + right = set->nelem - 1; + while (idx < right) + { + mid = (idx + right) / 2; + if (set->elems[mid] < elem) + idx = mid + 1; + else + right = mid; + } + return set->elems[idx] == elem ? idx + 1 : 0; +} + +static void +internal_function +re_node_set_remove_at (re_node_set *set, int idx) +{ + if (idx < 0 || idx >= set->nelem) + return; + --set->nelem; + for (; idx < set->nelem; idx++) + set->elems[idx] = set->elems[idx + 1]; +} + + +/* Add the token TOKEN to dfa->nodes, and return the index of the token. + Or return -1, if an error will be occured. */ + +static int +internal_function +re_dfa_add_node (re_dfa_t *dfa, re_token_t token) +{ + int type = token.type; + if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) + { + size_t new_nodes_alloc = dfa->nodes_alloc * 2; + int *new_nexts, *new_indices; + re_node_set *new_edests, *new_eclosures; + re_token_t *new_nodes; + + /* Avoid overflows. */ + if (BE (new_nodes_alloc < dfa->nodes_alloc, 0)) + return -1; + + new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); + if (BE (new_nodes == NULL, 0)) + return -1; + dfa->nodes = new_nodes; + new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc); + new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc); + new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); + new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); + if (BE (new_nexts == NULL || new_indices == NULL + || new_edests == NULL || new_eclosures == NULL, 0)) + return -1; + dfa->nexts = new_nexts; + dfa->org_indices = new_indices; + dfa->edests = new_edests; + dfa->eclosures = new_eclosures; + dfa->nodes_alloc = new_nodes_alloc; + } + dfa->nodes[dfa->nodes_len] = token; + dfa->nodes[dfa->nodes_len].constraint = 0; +#ifdef RE_ENABLE_I18N + dfa->nodes[dfa->nodes_len].accept_mb = + (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET; +#endif + dfa->nexts[dfa->nodes_len] = -1; + re_node_set_init_empty (dfa->edests + dfa->nodes_len); + re_node_set_init_empty (dfa->eclosures + dfa->nodes_len); + return dfa->nodes_len++; +} + +static inline unsigned int +internal_function +calc_state_hash (const re_node_set *nodes, unsigned int context) +{ + unsigned int hash = nodes->nelem + context; + int i; + for (i = 0 ; i < nodes->nelem ; i++) + hash += nodes->elems[i]; + return hash; +} + +/* Search for the state whose node_set is equivalent to NODES. + Return the pointer to the state, if we found it in the DFA. + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t * +internal_function +re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes) +{ + unsigned int hash; + re_dfastate_t *new_state; + struct re_state_table_entry *spot; + int i; + if (BE (nodes->nelem == 0, 0)) + { + *err = REG_NOERROR; + return NULL; + } + hash = calc_state_hash (nodes, 0); + spot = dfa->state_table + (hash & dfa->state_hash_mask); + + for (i = 0 ; i < spot->num ; i++) + { + re_dfastate_t *state = spot->array[i]; + if (hash != state->hash) + continue; + if (re_node_set_compare (&state->nodes, nodes)) + return state; + } + + /* There are no appropriate state in the dfa, create the new one. */ + new_state = create_ci_newstate (dfa, nodes, hash); + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; +} + +/* Search for the state whose node_set is equivalent to NODES and + whose context is equivalent to CONTEXT. + Return the pointer to the state, if we found it in the DFA. + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t * +internal_function +re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes, unsigned int context) +{ + unsigned int hash; + re_dfastate_t *new_state; + struct re_state_table_entry *spot; + int i; + if (nodes->nelem == 0) + { + *err = REG_NOERROR; + return NULL; + } + hash = calc_state_hash (nodes, context); + spot = dfa->state_table + (hash & dfa->state_hash_mask); + + for (i = 0 ; i < spot->num ; i++) + { + re_dfastate_t *state = spot->array[i]; + if (state->hash == hash + && state->context == context + && re_node_set_compare (state->entrance_nodes, nodes)) + return state; + } + /* There are no appropriate state in `dfa', create the new one. */ + new_state = create_cd_newstate (dfa, nodes, context, hash); + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; +} + +/* Finish initialization of the new state NEWSTATE, and using its hash value + HASH put in the appropriate bucket of DFA's state table. Return value + indicates the error code if failed. */ + +static reg_errcode_t +register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, + unsigned int hash) +{ + struct re_state_table_entry *spot; + reg_errcode_t err; + int i; + + newstate->hash = hash; + err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + for (i = 0; i < newstate->nodes.nelem; i++) + { + int elem = newstate->nodes.elems[i]; + if (!IS_EPSILON_NODE (dfa->nodes[elem].type)) + re_node_set_insert_last (&newstate->non_eps_nodes, elem); + } + + spot = dfa->state_table + (hash & dfa->state_hash_mask); + if (BE (spot->alloc <= spot->num, 0)) + { + int new_alloc = 2 * spot->num + 2; + re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *, + new_alloc); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + spot->array = new_array; + spot->alloc = new_alloc; + } + spot->array[spot->num++] = newstate; + return REG_NOERROR; +} + +static void +free_state (re_dfastate_t *state) +{ + re_node_set_free (&state->non_eps_nodes); + re_node_set_free (&state->inveclosure); + if (state->entrance_nodes != &state->nodes) + { + re_node_set_free (state->entrance_nodes); + re_free (state->entrance_nodes); + } + re_node_set_free (&state->nodes); + re_free (state->word_trtable); + re_free (state->trtable); + re_free (state); +} + +/* Create the new state which is independ of contexts. + Return the new state if succeeded, otherwise return NULL. */ + +static re_dfastate_t * +internal_function +create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int hash) +{ + int i; + reg_errcode_t err; + re_dfastate_t *newstate; + + newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (BE (newstate == NULL, 0)) + return NULL; + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } + + newstate->entrance_nodes = &newstate->nodes; + for (i = 0 ; i < nodes->nelem ; i++) + { + re_token_t *node = dfa->nodes + nodes->elems[i]; + re_token_type_t type = node->type; + if (type == CHARACTER && !node->constraint) + continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; + else if (type == OP_BACK_REF) + newstate->has_backref = 1; + else if (type == ANCHOR || node->constraint) + newstate->has_constraint = 1; + } + err = register_state (dfa, newstate, hash); + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} + +/* Create the new state which is depend on the context CONTEXT. + Return the new state if succeeded, otherwise return NULL. */ + +static re_dfastate_t * +internal_function +create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int context, unsigned int hash) +{ + int i, nctx_nodes = 0; + reg_errcode_t err; + re_dfastate_t *newstate; + + newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (BE (newstate == NULL, 0)) + return NULL; + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } + + newstate->context = context; + newstate->entrance_nodes = &newstate->nodes; + + for (i = 0 ; i < nodes->nelem ; i++) + { + re_token_t *node = dfa->nodes + nodes->elems[i]; + re_token_type_t type = node->type; + unsigned int constraint = node->constraint; + + if (type == CHARACTER && !constraint) + continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; + else if (type == OP_BACK_REF) + newstate->has_backref = 1; + + if (constraint) + { + if (newstate->entrance_nodes == &newstate->nodes) + { + newstate->entrance_nodes = re_malloc (re_node_set, 1); + if (BE (newstate->entrance_nodes == NULL, 0)) + { + free_state (newstate); + return NULL; + } + re_node_set_init_copy (newstate->entrance_nodes, nodes); + nctx_nodes = 0; + newstate->has_constraint = 1; + } + + if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) + { + re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); + ++nctx_nodes; + } + } + } + err = register_state (dfa, newstate, hash); + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} diff --git a/gnu_regex/.svn/text-base/regex_internal.h.svn-base b/gnu_regex/.svn/text-base/regex_internal.h.svn-base new file mode 100644 index 0000000..71c4a38 --- /dev/null +++ b/gnu_regex/.svn/text-base/regex_internal.h.svn-base @@ -0,0 +1,773 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002-2005, 2007, 2008 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _REGEX_INTERNAL_H +#define _REGEX_INTERNAL_H 1 + +#include +#include +#include +#include +#include + +#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC +# include +#endif +#if defined HAVE_LOCALE_H || defined _LIBC +# include +#endif +#if defined HAVE_WCHAR_H || defined _LIBC +# include +#endif /* HAVE_WCHAR_H || _LIBC */ +#if defined HAVE_WCTYPE_H || defined _LIBC +# include +#endif /* HAVE_WCTYPE_H || _LIBC */ +#if defined HAVE_STDBOOL_H || defined _LIBC +# include +#endif /* HAVE_STDBOOL_H || _LIBC */ +#if defined HAVE_STDINT_H || defined _LIBC +# include +#endif /* HAVE_STDINT_H || _LIBC */ +#if defined _LIBC +# include +#else +# define __libc_lock_define(CLASS,NAME) +# define __libc_lock_init(NAME) do { } while (0) +# define __libc_lock_lock(NAME) do { } while (0) +# define __libc_lock_unlock(NAME) do { } while (0) +#endif + +/* In case that the system doesn't have isblank(). */ +#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank +# define isblank(ch) ((ch) == ' ' || (ch) == '\t') +#endif + +#ifdef _LIBC +# ifndef _RE_DEFINE_LOCALE_FUNCTIONS +# define _RE_DEFINE_LOCALE_FUNCTIONS 1 +# include +# include +# include +# endif +#endif + +/* This is for other GNU distributions with internationalized messages. */ +#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include +# ifdef _LIBC +# undef gettext +# define gettext(msgid) \ + INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES) +# endif +#else +# define gettext(msgid) (msgid) +#endif + +#ifndef gettext_noop +/* This define is so xgettext can find the internationalizable + strings. */ +# define gettext_noop(String) String +#endif + +/* For loser systems without the definition. */ +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif + +#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC +# define RE_ENABLE_I18N +#endif + +#if __GNUC__ >= 3 +# define BE(expr, val) __builtin_expect (expr, val) +#else +# define BE(expr, val) (expr) +# define inline +#endif + +/* Number of single byte character. */ +#define SBC_MAX 256 + +#define COLL_ELEM_LEN_MAX 8 + +/* The character which represents newline. */ +#define NEWLINE_CHAR '\n' +#define WIDE_NEWLINE_CHAR L'\n' + +/* Rename to standard API for using out of glibc. */ +#ifndef _LIBC +# define __wctype wctype +# define __iswctype iswctype +# define __btowc btowc +# define __mbrtowc mbrtowc +# define __mempcpy mempcpy +# define __wcrtomb wcrtomb +# define __regfree regfree +# define attribute_hidden +#endif /* not _LIBC */ + +#ifdef __GNUC__ +# define __attribute(arg) __attribute__ (arg) +#else +# define __attribute(arg) +#endif + +extern const char __re_error_msgid[] attribute_hidden; +extern const size_t __re_error_msgid_idx[] attribute_hidden; + +/* An integer used to represent a set of bits. It must be unsigned, + and must be at least as wide as unsigned int. */ +typedef unsigned long int bitset_word_t; +/* All bits set in a bitset_word_t. */ +#define BITSET_WORD_MAX ULONG_MAX +/* Number of bits in a bitset_word_t. */ +#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT) +/* Number of bitset_word_t in a bit_set. */ +#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS) +typedef bitset_word_t bitset_t[BITSET_WORDS]; +typedef bitset_word_t *re_bitset_ptr_t; +typedef const bitset_word_t *re_const_bitset_ptr_t; + +#define bitset_set(set,i) \ + (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS) +#define bitset_clear(set,i) \ + (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_contain(set,i) \ + (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t)) +#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t)) +#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t)) + +#define PREV_WORD_CONSTRAINT 0x0001 +#define PREV_NOTWORD_CONSTRAINT 0x0002 +#define NEXT_WORD_CONSTRAINT 0x0004 +#define NEXT_NOTWORD_CONSTRAINT 0x0008 +#define PREV_NEWLINE_CONSTRAINT 0x0010 +#define NEXT_NEWLINE_CONSTRAINT 0x0020 +#define PREV_BEGBUF_CONSTRAINT 0x0040 +#define NEXT_ENDBUF_CONSTRAINT 0x0080 +#define WORD_DELIM_CONSTRAINT 0x0100 +#define NOT_WORD_DELIM_CONSTRAINT 0x0200 + +typedef enum +{ + INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, + WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, + WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + LINE_FIRST = PREV_NEWLINE_CONSTRAINT, + LINE_LAST = NEXT_NEWLINE_CONSTRAINT, + BUF_FIRST = PREV_BEGBUF_CONSTRAINT, + BUF_LAST = NEXT_ENDBUF_CONSTRAINT, + WORD_DELIM = WORD_DELIM_CONSTRAINT, + NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT +} re_context_type; + +typedef struct +{ + int alloc; + int nelem; + int *elems; +} re_node_set; + +typedef enum +{ + NON_TYPE = 0, + + /* Node type, These are used by token, node, tree. */ + CHARACTER = 1, + END_OF_RE = 2, + SIMPLE_BRACKET = 3, + OP_BACK_REF = 4, + OP_PERIOD = 5, +#ifdef RE_ENABLE_I18N + COMPLEX_BRACKET = 6, + OP_UTF8_PERIOD = 7, +#endif /* RE_ENABLE_I18N */ + + /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used + when the debugger shows values of this enum type. */ +#define EPSILON_BIT 8 + OP_OPEN_SUBEXP = EPSILON_BIT | 0, + OP_CLOSE_SUBEXP = EPSILON_BIT | 1, + OP_ALT = EPSILON_BIT | 2, + OP_DUP_ASTERISK = EPSILON_BIT | 3, + ANCHOR = EPSILON_BIT | 4, + + /* Tree type, these are used only by tree. */ + CONCAT = 16, + SUBEXP = 17, + + /* Token type, these are used only by token. */ + OP_DUP_PLUS = 18, + OP_DUP_QUESTION, + OP_OPEN_BRACKET, + OP_CLOSE_BRACKET, + OP_CHARSET_RANGE, + OP_OPEN_DUP_NUM, + OP_CLOSE_DUP_NUM, + OP_NON_MATCH_LIST, + OP_OPEN_COLL_ELEM, + OP_CLOSE_COLL_ELEM, + OP_OPEN_EQUIV_CLASS, + OP_CLOSE_EQUIV_CLASS, + OP_OPEN_CHAR_CLASS, + OP_CLOSE_CHAR_CLASS, + OP_WORD, + OP_NOTWORD, + OP_SPACE, + OP_NOTSPACE, + BACK_SLASH + +} re_token_type_t; + +#ifdef RE_ENABLE_I18N +typedef struct +{ + /* Multibyte characters. */ + wchar_t *mbchars; + + /* Collating symbols. */ +# ifdef _LIBC + int32_t *coll_syms; +# endif + + /* Equivalence classes. */ +# ifdef _LIBC + int32_t *equiv_classes; +# endif + + /* Range expressions. */ +# ifdef _LIBC + uint32_t *range_starts; + uint32_t *range_ends; +# else /* not _LIBC */ + wchar_t *range_starts; + wchar_t *range_ends; +# endif /* not _LIBC */ + + /* Character classes. */ + wctype_t *char_classes; + + /* If this character set is the non-matching list. */ + unsigned int non_match : 1; + + /* # of multibyte characters. */ + int nmbchars; + + /* # of collating symbols. */ + int ncoll_syms; + + /* # of equivalence classes. */ + int nequiv_classes; + + /* # of range expressions. */ + int nranges; + + /* # of character classes. */ + int nchar_classes; +} re_charset_t; +#endif /* RE_ENABLE_I18N */ + +typedef struct +{ + union + { + unsigned char c; /* for CHARACTER */ + re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; /* for COMPLEX_BRACKET */ +#endif /* RE_ENABLE_I18N */ + int idx; /* for BACK_REF */ + re_context_type ctx_type; /* for ANCHOR */ + } opr; +#if __GNUC__ >= 2 + re_token_type_t type : 8; +#else + re_token_type_t type; +#endif + unsigned int constraint : 10; /* context constraint */ + unsigned int duplicated : 1; + unsigned int opt_subexp : 1; +#ifdef RE_ENABLE_I18N + unsigned int accept_mb : 1; + /* These 2 bits can be moved into the union if needed (e.g. if running out + of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ + unsigned int mb_partial : 1; +#endif + unsigned int word_char : 1; +} re_token_t; + +#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT) + +struct re_string_t +{ + /* Indicate the raw buffer which is the original string passed as an + argument of regexec(), re_search(), etc.. */ + const unsigned char *raw_mbs; + /* Store the multibyte string. In case of "case insensitive mode" like + REG_ICASE, upper cases of the string are stored, otherwise MBS points + the same address that RAW_MBS points. */ + unsigned char *mbs; +#ifdef RE_ENABLE_I18N + /* Store the wide character string which is corresponding to MBS. */ + wint_t *wcs; + int *offsets; + mbstate_t cur_state; +#endif + /* Index in RAW_MBS. Each character mbs[i] corresponds to + raw_mbs[raw_mbs_idx + i]. */ + int raw_mbs_idx; + /* The length of the valid characters in the buffers. */ + int valid_len; + /* The corresponding number of bytes in raw_mbs array. */ + int valid_raw_len; + /* The length of the buffers MBS and WCS. */ + int bufs_len; + /* The index in MBS, which is updated by re_string_fetch_byte. */ + int cur_idx; + /* length of RAW_MBS array. */ + int raw_len; + /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */ + int len; + /* End of the buffer may be shorter than its length in the cases such + as re_match_2, re_search_2. Then, we use STOP for end of the buffer + instead of LEN. */ + int raw_stop; + /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */ + int stop; + + /* The context of mbs[0]. We store the context independently, since + the context of mbs[0] may be different from raw_mbs[0], which is + the beginning of the input string. */ + unsigned int tip_context; + /* The translation passed as a part of an argument of re_compile_pattern. */ + RE_TRANSLATE_TYPE trans; + /* Copy of re_dfa_t's word_char. */ + re_const_bitset_ptr_t word_char; + /* 1 if REG_ICASE. */ + unsigned char icase; + unsigned char is_utf8; + unsigned char map_notascii; + unsigned char mbs_allocated; + unsigned char offsets_needed; + unsigned char newline_anchor; + unsigned char word_ops_used; + int mb_cur_max; +}; +typedef struct re_string_t re_string_t; + + +struct re_dfa_t; +typedef struct re_dfa_t re_dfa_t; + +#ifndef _LIBC +# ifdef __i386__ +# define internal_function __attribute ((regparm (3), stdcall)) +# else +# define internal_function +# endif +#endif + +#ifndef NOT_IN_libc +static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, + int new_buf_len) + internal_function; +# ifdef RE_ENABLE_I18N +static void build_wcs_buffer (re_string_t *pstr) internal_function; +static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr) + internal_function; +# endif /* RE_ENABLE_I18N */ +static void build_upper_buffer (re_string_t *pstr) internal_function; +static void re_string_translate_buffer (re_string_t *pstr) internal_function; +static unsigned int re_string_context_at (const re_string_t *input, int idx, + int eflags) + internal_function __attribute ((pure)); +#endif +#define re_string_peek_byte(pstr, offset) \ + ((pstr)->mbs[(pstr)->cur_idx + offset]) +#define re_string_fetch_byte(pstr) \ + ((pstr)->mbs[(pstr)->cur_idx++]) +#define re_string_first_byte(pstr, idx) \ + ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF) +#define re_string_is_single_byte_char(pstr, idx) \ + ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \ + || (pstr)->wcs[(idx) + 1] != WEOF)) +#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx) +#define re_string_cur_idx(pstr) ((pstr)->cur_idx) +#define re_string_get_buffer(pstr) ((pstr)->mbs) +#define re_string_length(pstr) ((pstr)->len) +#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx]) +#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) +#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) + +#ifdef WIN32 +# include +#else +# include +#endif + +#ifndef _LIBC +# if HAVE_ALLOCA +/* The OS usually guarantees only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + allocate anything larger than 4096 bytes. Also care for the possibility + of a few compiler-allocated temporary stack slots. */ +# define __libc_use_alloca(n) ((n) < 4032) +# else +/* alloca is implemented with malloc, so just use malloc. */ +# define __libc_use_alloca(n) 0 +# endif +#endif + +#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) +#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t))) +#define re_free(p) free (p) + +struct bin_tree_t +{ + struct bin_tree_t *parent; + struct bin_tree_t *left; + struct bin_tree_t *right; + struct bin_tree_t *first; + struct bin_tree_t *next; + + re_token_t token; + + /* `node_idx' is the index in dfa->nodes, if `type' == 0. + Otherwise `type' indicate the type of this node. */ + int node_idx; +}; +typedef struct bin_tree_t bin_tree_t; + +#define BIN_TREE_STORAGE_SIZE \ + ((1024 - sizeof (void *)) / sizeof (bin_tree_t)) + +struct bin_tree_storage_t +{ + struct bin_tree_storage_t *next; + bin_tree_t data[BIN_TREE_STORAGE_SIZE]; +}; +typedef struct bin_tree_storage_t bin_tree_storage_t; + +#define CONTEXT_WORD 1 +#define CONTEXT_NEWLINE (CONTEXT_WORD << 1) +#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1) +#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1) + +#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD) +#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE) +#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF) +#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF) +#define IS_ORDINARY_CONTEXT(c) ((c) == 0) + +#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_') +#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) +#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_') +#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) + +#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \ + ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ + || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ + || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\ + || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context))) + +#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \ + ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ + || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ + || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \ + || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context))) + +struct re_dfastate_t +{ + unsigned int hash; + re_node_set nodes; + re_node_set non_eps_nodes; + re_node_set inveclosure; + re_node_set *entrance_nodes; + struct re_dfastate_t **trtable, **word_trtable; + unsigned int context : 4; + unsigned int halt : 1; + /* If this state can accept `multi byte'. + Note that we refer to multibyte characters, and multi character + collating elements as `multi byte'. */ + unsigned int accept_mb : 1; + /* If this state has backreference node(s). */ + unsigned int has_backref : 1; + unsigned int has_constraint : 1; +}; +typedef struct re_dfastate_t re_dfastate_t; + +struct re_state_table_entry +{ + int num; + int alloc; + re_dfastate_t **array; +}; + +/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */ + +typedef struct +{ + int next_idx; + int alloc; + re_dfastate_t **array; +} state_array_t; + +/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */ + +typedef struct +{ + int node; + int str_idx; /* The position NODE match at. */ + state_array_t path; +} re_sub_match_last_t; + +/* Store information about the node NODE whose type is OP_OPEN_SUBEXP. + And information about the node, whose type is OP_CLOSE_SUBEXP, + corresponding to NODE is stored in LASTS. */ + +typedef struct +{ + int str_idx; + int node; + state_array_t *path; + int alasts; /* Allocation size of LASTS. */ + int nlasts; /* The number of LASTS. */ + re_sub_match_last_t **lasts; +} re_sub_match_top_t; + +struct re_backref_cache_entry +{ + int node; + int str_idx; + int subexp_from; + int subexp_to; + char more; + char unused; + unsigned short int eps_reachable_subexps_map; +}; + +typedef struct +{ + /* The string object corresponding to the input string. */ + re_string_t input; +#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) + const re_dfa_t *const dfa; +#else + const re_dfa_t *dfa; +#endif + /* EFLAGS of the argument of regexec. */ + int eflags; + /* Where the matching ends. */ + int match_last; + int last_node; + /* The state log used by the matcher. */ + re_dfastate_t **state_log; + int state_log_top; + /* Back reference cache. */ + int nbkref_ents; + int abkref_ents; + struct re_backref_cache_entry *bkref_ents; + int max_mb_elem_len; + int nsub_tops; + int asub_tops; + re_sub_match_top_t **sub_tops; +} re_match_context_t; + +typedef struct +{ + re_dfastate_t **sifted_states; + re_dfastate_t **limited_states; + int last_node; + int last_str_idx; + re_node_set limits; +} re_sift_context_t; + +struct re_fail_stack_ent_t +{ + int idx; + int node; + regmatch_t *regs; + re_node_set eps_via_nodes; +}; + +struct re_fail_stack_t +{ + int num; + int alloc; + struct re_fail_stack_ent_t *stack; +}; + +struct re_dfa_t +{ + re_token_t *nodes; + size_t nodes_alloc; + size_t nodes_len; + int *nexts; + int *org_indices; + re_node_set *edests; + re_node_set *eclosures; + re_node_set *inveclosures; + struct re_state_table_entry *state_table; + re_dfastate_t *init_state; + re_dfastate_t *init_state_word; + re_dfastate_t *init_state_nl; + re_dfastate_t *init_state_begbuf; + bin_tree_t *str_tree; + bin_tree_storage_t *str_tree_storage; + re_bitset_ptr_t sb_char; + int str_tree_storage_idx; + + /* number of subexpressions `re_nsub' is in regex_t. */ + unsigned int state_hash_mask; + int init_node; + int nbackref; /* The number of backreference in this dfa. */ + + /* Bitmap expressing which backreference is used. */ + bitset_word_t used_bkref_map; + bitset_word_t completed_bkref_map; + + unsigned int has_plural_match : 1; + /* If this dfa has "multibyte node", which is a backreference or + a node which can accept multibyte character or multi character + collating element. */ + unsigned int has_mb_node : 1; + unsigned int is_utf8 : 1; + unsigned int map_notascii : 1; + unsigned int word_ops_used : 1; + int mb_cur_max; + bitset_t word_char; + reg_syntax_t syntax; + int *subexp_map; +#ifdef DEBUG + char* re_str; +#endif + __libc_lock_define (, lock) +}; + +#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) +#define re_node_set_remove(set,id) \ + (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) +#define re_node_set_empty(p) ((p)->nelem = 0) +#define re_node_set_free(set) re_free ((set)->elems) + + +typedef enum +{ + SB_CHAR, + MB_CHAR, + EQUIV_CLASS, + COLL_SYM, + CHAR_CLASS +} bracket_elem_type; + +typedef struct +{ + bracket_elem_type type; + union + { + unsigned char ch; + unsigned char *name; + wchar_t wch; + } opr; +} bracket_elem_t; + + +/* Inline functions for bitset operation. */ +static inline void +bitset_not (bitset_t set) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + set[bitset_i] = ~set[bitset_i]; +} + +static inline void +bitset_merge (bitset_t dest, const bitset_t src) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] |= src[bitset_i]; +} + +static inline void +bitset_mask (bitset_t dest, const bitset_t src) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] &= src[bitset_i]; +} + +#ifdef RE_ENABLE_I18N +/* Inline functions for re_string. */ +static inline int +internal_function __attribute ((pure)) +re_string_char_size_at (const re_string_t *pstr, int idx) +{ + int byte_idx; + if (pstr->mb_cur_max == 1) + return 1; + for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx) + if (pstr->wcs[idx + byte_idx] != WEOF) + break; + return byte_idx; +} + +static inline wint_t +internal_function __attribute ((pure)) +re_string_wchar_at (const re_string_t *pstr, int idx) +{ + if (pstr->mb_cur_max == 1) + return (wint_t) pstr->mbs[idx]; + return (wint_t) pstr->wcs[idx]; +} + +# ifndef NOT_IN_libc +static int +internal_function __attribute ((pure)) +re_string_elem_size_at (const re_string_t *pstr, int idx) +{ +# ifdef _LIBC + const unsigned char *p, *extra; + const int32_t *table, *indirect; + int32_t tmp; +# include + uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + + if (nrules != 0) + { + table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_INDIRECTMB); + p = pstr->mbs + idx; + tmp = findidx (&p); + return p - pstr->mbs - idx; + } + else +# endif /* _LIBC */ + return 1; +} +# endif +#endif /* RE_ENABLE_I18N */ + +#endif /* _REGEX_INTERNAL_H */ diff --git a/gnu_regex/.svn/text-base/regexec.c.svn-base b/gnu_regex/.svn/text-base/regexec.c.svn-base new file mode 100644 index 0000000..560921d --- /dev/null +++ b/gnu_regex/.svn/text-base/regexec.c.svn-base @@ -0,0 +1,4338 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2004, 2005, 2007, 2009 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, + int n) internal_function; +static void match_ctx_clean (re_match_context_t *mctx) internal_function; +static void match_ctx_free (re_match_context_t *cache) internal_function; +static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, + int str_idx, int from, int to) + internal_function; +static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) + internal_function; +static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node, + int str_idx) internal_function; +static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, + int node, int str_idx) + internal_function; +static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, + int last_str_idx) + internal_function; +static reg_errcode_t re_search_internal (const regex_t *preg, + const char *string, int length, + int start, int range, int stop, + size_t nmatch, regmatch_t pmatch[], + int eflags) internal_function; +static int re_search_2_stub (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, + int start, int range, struct re_registers *regs, + int stop, int ret_len) internal_function; +static int re_search_stub (struct re_pattern_buffer *bufp, + const char *string, int length, int start, + int range, int stop, struct re_registers *regs, + int ret_len) internal_function; +static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, + int nregs, int regs_allocated) internal_function; +static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) + internal_function; +static int check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) internal_function; +static int check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) + internal_function; +static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, + int cur_idx, int nmatch) internal_function; +static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, + int str_idx, int dest_node, int nregs, + regmatch_t *regs, + re_node_set *eps_via_nodes) + internal_function; +static reg_errcode_t set_regs (const regex_t *preg, + const re_match_context_t *mctx, + size_t nmatch, regmatch_t *pmatch, + int fl_backtrack) internal_function; +static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) + internal_function; + +#ifdef RE_ENABLE_I18N +static int sift_states_iter_mb (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) + internal_function; +#endif /* RE_ENABLE_I18N */ +static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, + re_sift_context_t *sctx) + internal_function; +static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *cur_dest) + internal_function; +static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, + re_node_set *dest_nodes) + internal_function; +static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates) + internal_function; +static int check_dst_limits (const re_match_context_t *mctx, + re_node_set *limits, + int dst_node, int dst_idx, int src_node, + int src_idx) internal_function; +static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, + int boundaries, int subexp_idx, + int from_node, int bkref_idx) + internal_function; +static int check_dst_limits_calc_pos (const re_match_context_t *mctx, + int limit, int subexp_idx, + int node, int str_idx, + int bkref_idx) internal_function; +static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates, + re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, + int str_idx) internal_function; +static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) + internal_function; +static reg_errcode_t merge_state_array (const re_dfa_t *dfa, + re_dfastate_t **dst, + re_dfastate_t **src, int num) + internal_function; +static re_dfastate_t *find_recover_state (reg_errcode_t *err, + re_match_context_t *mctx) internal_function; +static re_dfastate_t *transit_state (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *state) internal_function; +static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *next_state) + internal_function; +static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, + re_node_set *cur_nodes, + int str_idx) internal_function; +#if 0 +static re_dfastate_t *transit_state_sb (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *pstate) + internal_function; +#endif +#ifdef RE_ENABLE_I18N +static reg_errcode_t transit_state_mb (re_match_context_t *mctx, + re_dfastate_t *pstate) + internal_function; +#endif /* RE_ENABLE_I18N */ +static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, + const re_node_set *nodes) + internal_function; +static reg_errcode_t get_subexp (re_match_context_t *mctx, + int bkref_node, int bkref_str_idx) + internal_function; +static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, + const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, + int bkref_node, int bkref_str) + internal_function; +static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) internal_function; +static reg_errcode_t check_arrival (re_match_context_t *mctx, + state_array_t *path, int top_node, + int top_str, int last_node, int last_str, + int type) internal_function; +static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, + int str_idx, + re_node_set *cur_nodes, + re_node_set *next_nodes) + internal_function; +static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, + re_node_set *cur_nodes, + int ex_subexp, int type) + internal_function; +static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, + re_node_set *dst_nodes, + int target, int ex_subexp, + int type) internal_function; +static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, + re_node_set *cur_nodes, int cur_str, + int subexp_num, int type) + internal_function; +static int build_trtable (const re_dfa_t *dfa, + re_dfastate_t *state) internal_function; +#ifdef RE_ENABLE_I18N +static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int idx) + internal_function; +# ifdef _LIBC +static unsigned int find_collation_sequence_value (const unsigned char *mbs, + size_t name_len) + internal_function; +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ +static int group_nodes_into_DFAstates (const re_dfa_t *dfa, + const re_dfastate_t *state, + re_node_set *states_node, + bitset_t *states_ch) internal_function; +static int check_node_accept (const re_match_context_t *mctx, + const re_token_t *node, int idx) + internal_function; +static reg_errcode_t extend_buffers (re_match_context_t *mctx) + internal_function; + +/* Entry point for POSIX code. */ + +/* regexec searches for a given pattern, specified by PREG, in the + string STRING. + + If NMATCH is zero or REG_NOSUB was set in the cflags argument to + `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + least NMATCH elements, and we set them to the offsets of the + corresponding matched substrings. + + EFLAGS specifies `execution flags' which affect matching: if + REG_NOTBOL is set, then ^ does not match at the beginning of the + string; if REG_NOTEOL is set, then $ does not match at the end. + + We return 0 if we find a match and REG_NOMATCH if not. */ + +int +regexec (preg, string, nmatch, pmatch, eflags) + const regex_t *__restrict preg; + const char *__restrict string; + size_t nmatch; + regmatch_t pmatch[]; + int eflags; +{ + reg_errcode_t err; + int start, length; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + + if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) + return REG_BADPAT; + + if (eflags & REG_STARTEND) + { + start = pmatch[0].rm_so; + length = pmatch[0].rm_eo; + } + else + { + start = 0; + length = strlen (string); + } + + __libc_lock_lock (dfa->lock); + if (preg->no_sub) + err = re_search_internal (preg, string, length, start, length - start, + length, 0, NULL, eflags); + else + err = re_search_internal (preg, string, length, start, length - start, + length, nmatch, pmatch, eflags); + __libc_lock_unlock (dfa->lock); + return err != REG_NOERROR; +} + +#ifdef _LIBC +# include +versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4); + +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +__typeof__ (__regexec) __compat_regexec; + +int +attribute_compat_text_section +__compat_regexec (const regex_t *__restrict preg, + const char *__restrict string, size_t nmatch, + regmatch_t pmatch[], int eflags) +{ + return regexec (preg, string, nmatch, pmatch, + eflags & (REG_NOTBOL | REG_NOTEOL)); +} +compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); +# endif +#endif + +/* Entry points for GNU code. */ + +/* re_match, re_search, re_match_2, re_search_2 + + The former two functions operate on STRING with length LENGTH, + while the later two operate on concatenation of STRING1 and STRING2 + with lengths LENGTH1 and LENGTH2, respectively. + + re_match() matches the compiled pattern in BUFP against the string, + starting at index START. + + re_search() first tries matching at index START, then it tries to match + starting from index START + 1, and so on. The last start position tried + is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same + way as re_match().) + + The parameter STOP of re_{match,search}_2 specifies that no match exceeding + the first STOP characters of the concatenation of the strings should be + concerned. + + If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match + and all groups is stroed in REGS. (For the "_2" variants, the offsets are + computed relative to the concatenation, not relative to the individual + strings.) + + On success, re_match* functions return the length of the match, re_search* + return the position of the start of the match. Return value -1 means no + match was found and -2 indicates an internal error. */ + +int +re_match (bufp, string, length, start, regs) + struct re_pattern_buffer *bufp; + const char *string; + int length, start; + struct re_registers *regs; +{ + return re_search_stub (bufp, string, length, start, 0, length, regs, 1); +} +#ifdef _LIBC +weak_alias (__re_match, re_match) +#endif + +int +re_search (bufp, string, length, start, range, regs) + struct re_pattern_buffer *bufp; + const char *string; + int length, start, range; + struct re_registers *regs; +{ + return re_search_stub (bufp, string, length, start, range, length, regs, 0); +} +#ifdef _LIBC +weak_alias (__re_search, re_search) +#endif + +int +re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int length1, length2, start, stop; + struct re_registers *regs; +{ + return re_search_2_stub (bufp, string1, length1, string2, length2, + start, 0, regs, stop, 1); +} +#ifdef _LIBC +weak_alias (__re_match_2, re_match_2) +#endif + +int +re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int length1, length2, start, range, stop; + struct re_registers *regs; +{ + return re_search_2_stub (bufp, string1, length1, string2, length2, + start, range, regs, stop, 0); +} +#ifdef _LIBC +weak_alias (__re_search_2, re_search_2) +#endif + +static int +re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs, + stop, ret_len) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int length1, length2, start, range, stop, ret_len; + struct re_registers *regs; +{ + const char *str; + int rval; + int len = length1 + length2; + int free_str = 0; + + if (BE (length1 < 0 || length2 < 0 || stop < 0, 0)) + return -2; + + /* Concatenate the strings. */ + if (length2 > 0) + if (length1 > 0) + { + char *s = re_malloc (char, len); + + if (BE (s == NULL, 0)) + return -2; +#ifdef _LIBC + memcpy (__mempcpy (s, string1, length1), string2, length2); +#else + memcpy (s, string1, length1); + memcpy (s + length1, string2, length2); +#endif + str = s; + free_str = 1; + } + else + str = string2; + else + str = string1; + + rval = re_search_stub (bufp, str, len, start, range, stop, regs, + ret_len); + if (free_str) + re_free ((char *) str); + return rval; +} + +/* The parameters have the same meaning as those of re_search. + Additional parameters: + If RET_LEN is nonzero the length of the match is returned (re_match style); + otherwise the position of the match is returned. */ + +static int +re_search_stub (bufp, string, length, start, range, stop, regs, ret_len) + struct re_pattern_buffer *bufp; + const char *string; + int length, start, range, stop, ret_len; + struct re_registers *regs; +{ + reg_errcode_t result; + regmatch_t *pmatch; + int nregs, rval; + int eflags = 0; + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + + /* Check for out-of-range. */ + if (BE (start < 0 || start > length, 0)) + return -1; + if (BE (start + range > length, 0)) + range = length - start; + else if (BE (start + range < 0, 0)) + range = -start; + + __libc_lock_lock (dfa->lock); + + eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; + eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; + + /* Compile fastmap if we haven't yet. */ + if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate) + re_compile_fastmap (bufp); + + if (BE (bufp->no_sub, 0)) + regs = NULL; + + /* We need at least 1 register. */ + if (regs == NULL) + nregs = 1; + else if (BE (bufp->regs_allocated == REGS_FIXED && + regs->num_regs < bufp->re_nsub + 1, 0)) + { + nregs = regs->num_regs; + if (BE (nregs < 1, 0)) + { + /* Nothing can be copied to regs. */ + regs = NULL; + nregs = 1; + } + } + else + nregs = bufp->re_nsub + 1; + pmatch = re_malloc (regmatch_t, nregs); + if (BE (pmatch == NULL, 0)) + { + rval = -2; + goto out; + } + + result = re_search_internal (bufp, string, length, start, range, stop, + nregs, pmatch, eflags); + + rval = 0; + + /* I hope we needn't fill ther regs with -1's when no match was found. */ + if (result != REG_NOERROR) + rval = -1; + else if (regs != NULL) + { + /* If caller wants register contents data back, copy them. */ + bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, + bufp->regs_allocated); + if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) + rval = -2; + } + + if (BE (rval == 0, 1)) + { + if (ret_len) + { + assert (pmatch[0].rm_so == start); + rval = pmatch[0].rm_eo - start; + } + else + rval = pmatch[0].rm_so; + } + re_free (pmatch); + out: + __libc_lock_unlock (dfa->lock); + return rval; +} + +static unsigned +re_copy_regs (regs, pmatch, nregs, regs_allocated) + struct re_registers *regs; + regmatch_t *pmatch; + int nregs, regs_allocated; +{ + int rval = REGS_REALLOCATE; + int i; + int need_regs = nregs + 1; + /* We need one extra element beyond `num_regs' for the `-1' marker GNU code + uses. */ + + /* Have the register data arrays been allocated? */ + if (regs_allocated == REGS_UNALLOCATED) + { /* No. So allocate them with malloc. */ + regs->start = re_malloc (regoff_t, need_regs); + regs->end = re_malloc (regoff_t, need_regs); + if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0)) + return REGS_UNALLOCATED; + regs->num_regs = need_regs; + } + else if (regs_allocated == REGS_REALLOCATE) + { /* Yes. If we need more elements than were already + allocated, reallocate them. If we need fewer, just + leave it alone. */ + if (BE (need_regs > regs->num_regs, 0)) + { + regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); + regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs); + if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0)) + return REGS_UNALLOCATED; + regs->start = new_start; + regs->end = new_end; + regs->num_regs = need_regs; + } + } + else + { + assert (regs_allocated == REGS_FIXED); + /* This function may not be called with REGS_FIXED and nregs too big. */ + assert (regs->num_regs >= nregs); + rval = REGS_FIXED; + } + + /* Copy the regs. */ + for (i = 0; i < nregs; ++i) + { + regs->start[i] = pmatch[i].rm_so; + regs->end[i] = pmatch[i].rm_eo; + } + for ( ; i < regs->num_regs; ++i) + regs->start[i] = regs->end[i] = -1; + + return rval; +} + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use + this memory for recording register information. STARTS and ENDS + must be allocated using the malloc library routine, and must each + be at least NUM_REGS * sizeof (regoff_t) bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ + +void +re_set_registers (bufp, regs, num_regs, starts, ends) + struct re_pattern_buffer *bufp; + struct re_registers *regs; + unsigned num_regs; + regoff_t *starts, *ends; +{ + if (num_regs) + { + bufp->regs_allocated = REGS_REALLOCATE; + regs->num_regs = num_regs; + regs->start = starts; + regs->end = ends; + } + else + { + bufp->regs_allocated = REGS_UNALLOCATED; + regs->num_regs = 0; + regs->start = regs->end = (regoff_t *) 0; + } +} +#ifdef _LIBC +weak_alias (__re_set_registers, re_set_registers) +#endif + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined _REGEX_RE_COMP || defined _LIBC +int +# ifdef _LIBC +weak_function +# endif +re_exec (s) + const char *s; +{ + return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); +} +#endif /* _REGEX_RE_COMP */ + +/* Internal entry point. */ + +/* Searches for a compiled pattern PREG in the string STRING, whose + length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same + mingings with regexec. START, and RANGE have the same meanings + with re_search. + Return REG_NOERROR if we find a match, and REG_NOMATCH if not, + otherwise return the error code. + Note: We assume front end functions already check ranges. + (START + RANGE >= 0 && START + RANGE <= LENGTH) */ + +static reg_errcode_t +re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, + eflags) + const regex_t *preg; + const char *string; + int length, start, range, stop, eflags; + size_t nmatch; + regmatch_t pmatch[]; +{ + reg_errcode_t err; + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + int left_lim, right_lim, incr; + int fl_longest_match, match_first, match_kind, match_last = -1; + int extra_nmatch; + int sb, ch; +#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) + re_match_context_t mctx = { .dfa = dfa }; +#else + re_match_context_t mctx; +#endif + char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate + && range && !preg->can_be_null) ? preg->fastmap : NULL; + RE_TRANSLATE_TYPE t = preg->translate; + +#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) + memset (&mctx, '\0', sizeof (re_match_context_t)); + mctx.dfa = dfa; +#endif + + extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; + nmatch -= extra_nmatch; + + /* Check if the DFA haven't been compiled. */ + if (BE (preg->used == 0 || dfa->init_state == NULL + || dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL, 0)) + return REG_NOMATCH; + +#ifdef DEBUG + /* We assume front-end functions already check them. */ + assert (start + range >= 0 && start + range <= length); +#endif + + /* If initial states with non-begbuf contexts have no elements, + the regex must be anchored. If preg->newline_anchor is set, + we'll never use init_state_nl, so do not check it. */ + if (dfa->init_state->nodes.nelem == 0 + && dfa->init_state_word->nodes.nelem == 0 + && (dfa->init_state_nl->nodes.nelem == 0 + || !preg->newline_anchor)) + { + if (start != 0 && start + range != 0) + return REG_NOMATCH; + start = range = 0; + } + + /* We must check the longest matching, if nmatch > 0. */ + fl_longest_match = (nmatch != 0 || dfa->nbackref); + + err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, + preg->translate, preg->syntax & RE_ICASE, dfa); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + mctx.input.stop = stop; + mctx.input.raw_stop = stop; + mctx.input.newline_anchor = preg->newline_anchor; + + err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* We will log all the DFA states through which the dfa pass, + if nmatch > 1, or this dfa has "multibyte node", which is a + back-reference or a node which can accept multibyte character or + multi character collating element. */ + if (nmatch > 1 || dfa->has_mb_node) + { + mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); + if (BE (mctx.state_log == NULL, 0)) + { + err = REG_ESPACE; + goto free_return; + } + } + else + mctx.state_log = NULL; + + match_first = start; + mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF + : CONTEXT_NEWLINE | CONTEXT_BEGBUF; + + /* Check incrementally whether of not the input string match. */ + incr = (range < 0) ? -1 : 1; + left_lim = (range < 0) ? start + range : start; + right_lim = (range < 0) ? start : start + range; + sb = dfa->mb_cur_max == 1; + match_kind = + (fastmap + ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) + | (range >= 0 ? 2 : 0) + | (t != NULL ? 1 : 0)) + : 8); + + for (;; match_first += incr) + { + err = REG_NOMATCH; + if (match_first < left_lim || right_lim < match_first) + goto free_return; + + /* Advance as rapidly as possible through the string, until we + find a plausible place to start matching. This may be done + with varying efficiency, so there are various possibilities: + only the most common of them are specialized, in order to + save on code size. We use a switch statement for speed. */ + switch (match_kind) + { + case 8: + /* No fastmap. */ + break; + + case 7: + /* Fastmap with single-byte translation, match forward. */ + while (BE (match_first < right_lim, 1) + && !fastmap[t[(unsigned char) string[match_first]]]) + ++match_first; + goto forward_match_found_start_or_reached_end; + + case 6: + /* Fastmap without translation, match forward. */ + while (BE (match_first < right_lim, 1) + && !fastmap[(unsigned char) string[match_first]]) + ++match_first; + + forward_match_found_start_or_reached_end: + if (BE (match_first == right_lim, 0)) + { + ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (!fastmap[t ? t[ch] : ch]) + goto free_return; + } + break; + + case 4: + case 5: + /* Fastmap without multi-byte translation, match backwards. */ + while (match_first >= left_lim) + { + ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (fastmap[t ? t[ch] : ch]) + break; + --match_first; + } + if (match_first < left_lim) + goto free_return; + break; + + default: + /* In this case, we can't determine easily the current byte, + since it might be a component byte of a multibyte + character. Then we use the constructed buffer instead. */ + for (;;) + { + /* If MATCH_FIRST is out of the valid range, reconstruct the + buffers. */ + unsigned int offset = match_first - mctx.input.raw_mbs_idx; + if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0)) + { + err = re_string_reconstruct (&mctx.input, match_first, + eflags); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + offset = match_first - mctx.input.raw_mbs_idx; + } + /* If MATCH_FIRST is out of the buffer, leave it as '\0'. + Note that MATCH_FIRST must not be smaller than 0. */ + ch = (match_first >= length + ? 0 : re_string_byte_at (&mctx.input, offset)); + if (fastmap[ch]) + break; + match_first += incr; + if (match_first < left_lim || match_first > right_lim) + { + err = REG_NOMATCH; + goto free_return; + } + } + break; + } + + /* Reconstruct the buffers so that the matcher can assume that + the matching starts from the beginning of the buffer. */ + err = re_string_reconstruct (&mctx.input, match_first, eflags); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + +#ifdef RE_ENABLE_I18N + /* Don't consider this char as a possible match start if it part, + yet isn't the head, of a multibyte character. */ + if (!sb && !re_string_first_byte (&mctx.input, 0)) + continue; +#endif + + /* It seems to be appropriate one, then use the matcher. */ + /* We assume that the matching starts from 0. */ + mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; + match_last = check_matching (&mctx, fl_longest_match, + range >= 0 ? &match_first : NULL); + if (match_last != -1) + { + if (BE (match_last == -2, 0)) + { + err = REG_ESPACE; + goto free_return; + } + else + { + mctx.match_last = match_last; + if ((!preg->no_sub && nmatch > 1) || dfa->nbackref) + { + re_dfastate_t *pstate = mctx.state_log[match_last]; + mctx.last_node = check_halt_state_context (&mctx, pstate, + match_last); + } + if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match) + || dfa->nbackref) + { + err = prune_impossible_nodes (&mctx); + if (err == REG_NOERROR) + break; + if (BE (err != REG_NOMATCH, 0)) + goto free_return; + match_last = -1; + } + else + break; /* We found a match. */ + } + } + + match_ctx_clean (&mctx); + } + +#ifdef DEBUG + assert (match_last != -1); + assert (err == REG_NOERROR); +#endif + + /* Set pmatch[] if we need. */ + if (nmatch > 0) + { + int reg_idx; + + /* Initialize registers. */ + for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) + pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; + + /* Set the points where matching start/end. */ + pmatch[0].rm_so = 0; + pmatch[0].rm_eo = mctx.match_last; + + if (!preg->no_sub && nmatch > 1) + { + err = set_regs (preg, &mctx, nmatch, pmatch, + dfa->has_plural_match && dfa->nbackref > 0); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + + /* At last, add the offset to the each registers, since we slided + the buffers so that we could assume that the matching starts + from 0. */ + for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + if (pmatch[reg_idx].rm_so != -1) + { +#ifdef RE_ENABLE_I18N + if (BE (mctx.input.offsets_needed != 0, 0)) + { + pmatch[reg_idx].rm_so = + (pmatch[reg_idx].rm_so == mctx.input.valid_len + ? mctx.input.valid_raw_len + : mctx.input.offsets[pmatch[reg_idx].rm_so]); + pmatch[reg_idx].rm_eo = + (pmatch[reg_idx].rm_eo == mctx.input.valid_len + ? mctx.input.valid_raw_len + : mctx.input.offsets[pmatch[reg_idx].rm_eo]); + } +#else + assert (mctx.input.offsets_needed == 0); +#endif + pmatch[reg_idx].rm_so += match_first; + pmatch[reg_idx].rm_eo += match_first; + } + for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx) + { + pmatch[nmatch + reg_idx].rm_so = -1; + pmatch[nmatch + reg_idx].rm_eo = -1; + } + + if (dfa->subexp_map) + for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) + if (dfa->subexp_map[reg_idx] != reg_idx) + { + pmatch[reg_idx + 1].rm_so + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; + pmatch[reg_idx + 1].rm_eo + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; + } + } + + free_return: + re_free (mctx.state_log); + if (dfa->nbackref) + match_ctx_free (&mctx); + re_string_destruct (&mctx.input); + return err; +} + +static reg_errcode_t +prune_impossible_nodes (mctx) + re_match_context_t *mctx; +{ + const re_dfa_t *const dfa = mctx->dfa; + int halt_node, match_last; + reg_errcode_t ret; + re_dfastate_t **sifted_states; + re_dfastate_t **lim_states = NULL; + re_sift_context_t sctx; +#ifdef DEBUG + assert (mctx->state_log != NULL); +#endif + match_last = mctx->match_last; + halt_node = mctx->last_node; + sifted_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (sifted_states == NULL, 0)) + { + ret = REG_ESPACE; + goto free_return; + } + if (dfa->nbackref) + { + lim_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (lim_states == NULL, 0)) + { + ret = REG_ESPACE; + goto free_return; + } + while (1) + { + memset (lim_states, '\0', + sizeof (re_dfastate_t *) * (match_last + 1)); + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, + match_last); + ret = sift_states_backward (mctx, &sctx); + re_node_set_free (&sctx.limits); + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + if (sifted_states[0] != NULL || lim_states[0] != NULL) + break; + do + { + --match_last; + if (match_last < 0) + { + ret = REG_NOMATCH; + goto free_return; + } + } while (mctx->state_log[match_last] == NULL + || !mctx->state_log[match_last]->halt); + halt_node = check_halt_state_context (mctx, + mctx->state_log[match_last], + match_last); + } + ret = merge_state_array (dfa, sifted_states, lim_states, + match_last + 1); + re_free (lim_states); + lim_states = NULL; + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + } + else + { + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last); + ret = sift_states_backward (mctx, &sctx); + re_node_set_free (&sctx.limits); + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + if (sifted_states[0] == NULL) + { + ret = REG_NOMATCH; + goto free_return; + } + } + re_free (mctx->state_log); + mctx->state_log = sifted_states; + sifted_states = NULL; + mctx->last_node = halt_node; + mctx->match_last = match_last; + ret = REG_NOERROR; + free_return: + re_free (sifted_states); + re_free (lim_states); + return ret; +} + +/* Acquire an initial state and return it. + We must select appropriate initial state depending on the context, + since initial states may have constraints like "\<", "^", etc.. */ + +static inline re_dfastate_t * +__attribute ((always_inline)) internal_function +acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, + int idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + if (dfa->init_state->has_constraint) + { + unsigned int context; + context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags); + if (IS_WORD_CONTEXT (context)) + return dfa->init_state_word; + else if (IS_ORDINARY_CONTEXT (context)) + return dfa->init_state; + else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context)) + return dfa->init_state_begbuf; + else if (IS_NEWLINE_CONTEXT (context)) + return dfa->init_state_nl; + else if (IS_BEGBUF_CONTEXT (context)) + { + /* It is relatively rare case, then calculate on demand. */ + return re_acquire_state_context (err, dfa, + dfa->init_state->entrance_nodes, + context); + } + else + /* Must not happen? */ + return dfa->init_state; + } + else + return dfa->init_state; +} + +/* Check whether the regular expression match input string INPUT or not, + and return the index where the matching end, return -1 if not match, + or return -2 in case of an error. + FL_LONGEST_MATCH means we want the POSIX longest matching. + If P_MATCH_FIRST is not NULL, and the match fails, it is set to the + next place where we may want to try matching. + Note that the matcher assume that the maching starts from the current + index of the buffer. */ + +static int +internal_function +check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int match = 0; + int match_last = -1; + int cur_str_idx = re_string_cur_idx (&mctx->input); + re_dfastate_t *cur_state; + int at_init_state = p_match_first != NULL; + int next_start_idx = cur_str_idx; + + err = REG_NOERROR; + cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); + /* An initial state must not be NULL (invalid). */ + if (BE (cur_state == NULL, 0)) + { + assert (err == REG_ESPACE); + return -2; + } + + if (mctx->state_log != NULL) + { + mctx->state_log[cur_str_idx] = cur_state; + + /* Check OP_OPEN_SUBEXP in the initial state in case that we use them + later. E.g. Processing back references. */ + if (BE (dfa->nbackref, 0)) + { + at_init_state = 0; + err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (cur_state->has_backref) + { + err = transit_state_bkref (mctx, &cur_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + } + + /* If the RE accepts NULL string. */ + if (BE (cur_state->halt, 0)) + { + if (!cur_state->has_constraint + || check_halt_state_context (mctx, cur_state, cur_str_idx)) + { + if (!fl_longest_match) + return cur_str_idx; + else + { + match_last = cur_str_idx; + match = 1; + } + } + } + + while (!re_string_eoi (&mctx->input)) + { + re_dfastate_t *old_state = cur_state; + int next_char_idx = re_string_cur_idx (&mctx->input) + 1; + + if (BE (next_char_idx >= mctx->input.bufs_len, 0) + || (BE (next_char_idx >= mctx->input.valid_len, 0) + && mctx->input.valid_len < mctx->input.len)) + { + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + { + assert (err == REG_ESPACE); + return -2; + } + } + + cur_state = transit_state (&err, mctx, cur_state); + if (mctx->state_log != NULL) + cur_state = merge_state_with_log (&err, mctx, cur_state); + + if (cur_state == NULL) + { + /* Reached the invalid state or an error. Try to recover a valid + state using the state log, if available and if we have not + already found a valid (even if not the longest) match. */ + if (BE (err != REG_NOERROR, 0)) + return -2; + + if (mctx->state_log == NULL + || (match && !fl_longest_match) + || (cur_state = find_recover_state (&err, mctx)) == NULL) + break; + } + + if (BE (at_init_state, 0)) + { + if (old_state == cur_state) + next_start_idx = next_char_idx; + else + at_init_state = 0; + } + + if (cur_state->halt) + { + /* Reached a halt state. + Check the halt state can satisfy the current context. */ + if (!cur_state->has_constraint + || check_halt_state_context (mctx, cur_state, + re_string_cur_idx (&mctx->input))) + { + /* We found an appropriate halt state. */ + match_last = re_string_cur_idx (&mctx->input); + match = 1; + + /* We found a match, do not modify match_first below. */ + p_match_first = NULL; + if (!fl_longest_match) + break; + } + } + } + + if (p_match_first) + *p_match_first += next_start_idx; + + return match_last; +} + +/* Check NODE match the current context. */ + +static int +internal_function +check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context) +{ + re_token_type_t type = dfa->nodes[node].type; + unsigned int constraint = dfa->nodes[node].constraint; + if (type != END_OF_RE) + return 0; + if (!constraint) + return 1; + if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) + return 0; + return 1; +} + +/* Check the halt state STATE match the current context. + Return 0 if not match, if the node, STATE has, is a halt node and + match the context, return the node. */ + +static int +internal_function +check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) +{ + int i; + unsigned int context; +#ifdef DEBUG + assert (state->halt); +#endif + context = re_string_context_at (&mctx->input, idx, mctx->eflags); + for (i = 0; i < state->nodes.nelem; ++i) + if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context)) + return state->nodes.elems[i]; + return 0; +} + +/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA + corresponding to the DFA). + Return the destination node, and update EPS_VIA_NODES, return -1 in case + of errors. */ + +static int +internal_function +proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs, + int *pidx, int node, re_node_set *eps_via_nodes, + struct re_fail_stack_t *fs) +{ + const re_dfa_t *const dfa = mctx->dfa; + int i, err; + if (IS_EPSILON_NODE (dfa->nodes[node].type)) + { + re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; + re_node_set *edests = &dfa->edests[node]; + int dest_node; + err = re_node_set_insert (eps_via_nodes, node); + if (BE (err < 0, 0)) + return -2; + /* Pick up a valid destination, or return -1 if none is found. */ + for (dest_node = -1, i = 0; i < edests->nelem; ++i) + { + int candidate = edests->elems[i]; + if (!re_node_set_contains (cur_nodes, candidate)) + continue; + if (dest_node == -1) + dest_node = candidate; + + else + { + /* In order to avoid infinite loop like "(a*)*", return the second + epsilon-transition if the first was already considered. */ + if (re_node_set_contains (eps_via_nodes, dest_node)) + return candidate; + + /* Otherwise, push the second epsilon-transition on the fail stack. */ + else if (fs != NULL + && push_fail_stack (fs, *pidx, candidate, nregs, regs, + eps_via_nodes)) + return -2; + + /* We know we are going to exit. */ + break; + } + } + return dest_node; + } + else + { + int naccepted = 0; + re_token_type_t type = dfa->nodes[node].type; + +#ifdef RE_ENABLE_I18N + if (dfa->nodes[node].accept_mb) + naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); + else +#endif /* RE_ENABLE_I18N */ + if (type == OP_BACK_REF) + { + int subexp_idx = dfa->nodes[node].opr.idx + 1; + naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; + if (fs != NULL) + { + if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) + return -1; + else if (naccepted) + { + char *buf = (char *) re_string_get_buffer (&mctx->input); + if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, + naccepted) != 0) + return -1; + } + } + + if (naccepted == 0) + { + int dest_node; + err = re_node_set_insert (eps_via_nodes, node); + if (BE (err < 0, 0)) + return -2; + dest_node = dfa->edests[node].elems[0]; + if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, + dest_node)) + return dest_node; + } + } + + if (naccepted != 0 + || check_node_accept (mctx, dfa->nodes + node, *pidx)) + { + int dest_node = dfa->nexts[node]; + *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; + if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL + || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, + dest_node))) + return -1; + re_node_set_empty (eps_via_nodes); + return dest_node; + } + } + return -1; +} + +static reg_errcode_t +internal_function +push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node, + int nregs, regmatch_t *regs, re_node_set *eps_via_nodes) +{ + reg_errcode_t err; + int num = fs->num++; + if (fs->num == fs->alloc) + { + struct re_fail_stack_ent_t *new_array; + new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) + * fs->alloc * 2)); + if (new_array == NULL) + return REG_ESPACE; + fs->alloc *= 2; + fs->stack = new_array; + } + fs->stack[num].idx = str_idx; + fs->stack[num].node = dest_node; + fs->stack[num].regs = re_malloc (regmatch_t, nregs); + if (fs->stack[num].regs == NULL) + return REG_ESPACE; + memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); + err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); + return err; +} + +static int +internal_function +pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, + regmatch_t *regs, re_node_set *eps_via_nodes) +{ + int num = --fs->num; + assert (num >= 0); + *pidx = fs->stack[num].idx; + memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); + re_node_set_free (eps_via_nodes); + re_free (fs->stack[num].regs); + *eps_via_nodes = fs->stack[num].eps_via_nodes; + return fs->stack[num].node; +} + +/* Set the positions where the subexpressions are starts/ends to registers + PMATCH. + Note: We assume that pmatch[0] is already set, and + pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ + +static reg_errcode_t +internal_function +set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, + regmatch_t *pmatch, int fl_backtrack) +{ + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + int idx, cur_node; + re_node_set eps_via_nodes; + struct re_fail_stack_t *fs; + struct re_fail_stack_t fs_body = { 0, 2, NULL }; + regmatch_t *prev_idx_match; + int prev_idx_match_malloced = 0; + +#ifdef DEBUG + assert (nmatch > 1); + assert (mctx->state_log != NULL); +#endif + if (fl_backtrack) + { + fs = &fs_body; + fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc); + if (fs->stack == NULL) + return REG_ESPACE; + } + else + fs = NULL; + + cur_node = dfa->init_node; + re_node_set_init_empty (&eps_via_nodes); + + if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) + prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t)); + else + { + prev_idx_match = re_malloc (regmatch_t, nmatch); + if (prev_idx_match == NULL) + { + free_fail_stack_return (fs); + return REG_ESPACE; + } + prev_idx_match_malloced = 1; + } + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); + + for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) + { + update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); + + if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) + { + int reg_idx; + if (fs) + { + for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) + break; + if (reg_idx == nmatch) + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return free_fail_stack_return (fs); + } + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + &eps_via_nodes); + } + else + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return REG_NOERROR; + } + } + + /* Proceed to next node. */ + cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, + &eps_via_nodes, fs); + + if (BE (cur_node < 0, 0)) + { + if (BE (cur_node == -2, 0)) + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + free_fail_stack_return (fs); + return REG_ESPACE; + } + if (fs) + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + &eps_via_nodes); + else + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return REG_NOMATCH; + } + } + } + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return free_fail_stack_return (fs); +} + +static reg_errcode_t +internal_function +free_fail_stack_return (struct re_fail_stack_t *fs) +{ + if (fs) + { + int fs_idx; + for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) + { + re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); + re_free (fs->stack[fs_idx].regs); + } + re_free (fs->stack); + } + return REG_NOERROR; +} + +static void +internal_function +update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch) +{ + int type = dfa->nodes[cur_node].type; + if (type == OP_OPEN_SUBEXP) + { + int reg_num = dfa->nodes[cur_node].opr.idx + 1; + + /* We are at the first node of this sub expression. */ + if (reg_num < nmatch) + { + pmatch[reg_num].rm_so = cur_idx; + pmatch[reg_num].rm_eo = -1; + } + } + else if (type == OP_CLOSE_SUBEXP) + { + int reg_num = dfa->nodes[cur_node].opr.idx + 1; + if (reg_num < nmatch) + { + /* We are at the last node of this sub expression. */ + if (pmatch[reg_num].rm_so < cur_idx) + { + pmatch[reg_num].rm_eo = cur_idx; + /* This is a non-empty match or we are not inside an optional + subexpression. Accept this right away. */ + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); + } + else + { + if (dfa->nodes[cur_node].opt_subexp + && prev_idx_match[reg_num].rm_so != -1) + /* We transited through an empty match for an optional + subexpression, like (a?)*, and this is not the subexp's + first match. Copy back the old content of the registers + so that matches of an inner subexpression are undone as + well, like in ((a?))*. */ + memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch); + else + /* We completed a subexpression, but it may be part of + an optional one, so do not update PREV_IDX_MATCH. */ + pmatch[reg_num].rm_eo = cur_idx; + } + } + } +} + +/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0 + and sift the nodes in each states according to the following rules. + Updated state_log will be wrote to STATE_LOG. + + Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... + 1. When STR_IDX == MATCH_LAST(the last index in the state_log): + If `a' isn't the LAST_NODE and `a' can't epsilon transit to + the LAST_NODE, we throw away the node `a'. + 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts + string `s' and transit to `b': + i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw + away the node `a'. + ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is + thrown away, we throw away the node `a'. + 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': + i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the + node `a'. + ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, + we throw away the node `a'. */ + +#define STATE_NODE_CONTAINS(state,node) \ + ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) + +static reg_errcode_t +internal_function +sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) +{ + reg_errcode_t err; + int null_cnt = 0; + int str_idx = sctx->last_str_idx; + re_node_set cur_dest; + +#ifdef DEBUG + assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); +#endif + + /* Build sifted state_log[str_idx]. It has the nodes which can epsilon + transit to the last_node and the last_node itself. */ + err = re_node_set_init_1 (&cur_dest, sctx->last_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* Then check each states in the state_log. */ + while (str_idx > 0) + { + /* Update counters. */ + null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; + if (null_cnt > mctx->max_mb_elem_len) + { + memset (sctx->sifted_states, '\0', + sizeof (re_dfastate_t *) * str_idx); + re_node_set_free (&cur_dest); + return REG_NOERROR; + } + re_node_set_empty (&cur_dest); + --str_idx; + + if (mctx->state_log[str_idx]) + { + err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + + /* Add all the nodes which satisfy the following conditions: + - It can epsilon transit to a node in CUR_DEST. + - It is in CUR_SRC. + And update state_log. */ + err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + err = REG_NOERROR; + free_return: + re_node_set_free (&cur_dest); + return err; +} + +static reg_errcode_t +internal_function +build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, re_node_set *cur_dest) +{ + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; + int i; + + /* Then build the next sifted state. + We build the next sifted state on `cur_dest', and update + `sifted_states[str_idx]' with `cur_dest'. + Note: + `cur_dest' is the sifted state from `state_log[str_idx + 1]'. + `cur_src' points the node_set of the old `state_log[str_idx]' + (with the epsilon nodes pre-filtered out). */ + for (i = 0; i < cur_src->nelem; i++) + { + int prev_node = cur_src->elems[i]; + int naccepted = 0; + int ret; + +#ifdef DEBUG + re_token_type_t type = dfa->nodes[prev_node].type; + assert (!IS_EPSILON_NODE (type)); +#endif +#ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (dfa->nodes[prev_node].accept_mb) + naccepted = sift_states_iter_mb (mctx, sctx, prev_node, + str_idx, sctx->last_str_idx); +#endif /* RE_ENABLE_I18N */ + + /* We don't check backreferences here. + See update_cur_sifted_state(). */ + if (!naccepted + && check_node_accept (mctx, dfa->nodes + prev_node, str_idx) + && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], + dfa->nexts[prev_node])) + naccepted = 1; + + if (naccepted == 0) + continue; + + if (sctx->limits.nelem) + { + int to_idx = str_idx + naccepted; + if (check_dst_limits (mctx, &sctx->limits, + dfa->nexts[prev_node], to_idx, + prev_node, str_idx)) + continue; + } + ret = re_node_set_insert (cur_dest, prev_node); + if (BE (ret == -1, 0)) + return REG_ESPACE; + } + + return REG_NOERROR; +} + +/* Helper functions. */ + +static reg_errcode_t +internal_function +clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) +{ + int top = mctx->state_log_top; + + if (next_state_log_idx >= mctx->input.bufs_len + || (next_state_log_idx >= mctx->input.valid_len + && mctx->input.valid_len < mctx->input.len)) + { + reg_errcode_t err; + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (top < next_state_log_idx) + { + memset (mctx->state_log + top + 1, '\0', + sizeof (re_dfastate_t *) * (next_state_log_idx - top)); + mctx->state_log_top = next_state_log_idx; + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, + re_dfastate_t **src, int num) +{ + int st_idx; + reg_errcode_t err; + for (st_idx = 0; st_idx < num; ++st_idx) + { + if (dst[st_idx] == NULL) + dst[st_idx] = src[st_idx]; + else if (src[st_idx] != NULL) + { + re_node_set merged_set; + err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, + &src[st_idx]->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); + re_node_set_free (&merged_set); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *dest_nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; + const re_node_set *candidates; + candidates = ((mctx->state_log[str_idx] == NULL) ? NULL + : &mctx->state_log[str_idx]->nodes); + + if (dest_nodes->nelem == 0) + sctx->sifted_states[str_idx] = NULL; + else + { + if (candidates) + { + /* At first, add the nodes which can epsilon transit to a node in + DEST_NODE. */ + err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* Then, check the limitations in the current sift_context. */ + if (sctx->limits.nelem) + { + err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, + mctx->bkref_ents, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + + sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (candidates && mctx->state_log[str_idx]->has_backref) + { + err = sift_states_bkref (mctx, sctx, str_idx, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates) +{ + reg_errcode_t err = REG_NOERROR; + int i; + + re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (!state->inveclosure.alloc) + { + err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + for (i = 0; i < dest_nodes->nelem; i++) + re_node_set_merge (&state->inveclosure, + dfa->inveclosures + dest_nodes->elems[i]); + } + return re_node_set_add_intersect (dest_nodes, candidates, + &state->inveclosure); +} + +static reg_errcode_t +internal_function +sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes, + const re_node_set *candidates) +{ + int ecl_idx; + reg_errcode_t err; + re_node_set *inv_eclosure = dfa->inveclosures + node; + re_node_set except_nodes; + re_node_set_init_empty (&except_nodes); + for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) + { + int cur_node = inv_eclosure->elems[ecl_idx]; + if (cur_node == node) + continue; + if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) + { + int edst1 = dfa->edests[cur_node].elems[0]; + int edst2 = ((dfa->edests[cur_node].nelem > 1) + ? dfa->edests[cur_node].elems[1] : -1); + if ((!re_node_set_contains (inv_eclosure, edst1) + && re_node_set_contains (dest_nodes, edst1)) + || (edst2 > 0 + && !re_node_set_contains (inv_eclosure, edst2) + && re_node_set_contains (dest_nodes, edst2))) + { + err = re_node_set_add_intersect (&except_nodes, candidates, + dfa->inveclosures + cur_node); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&except_nodes); + return err; + } + } + } + } + for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) + { + int cur_node = inv_eclosure->elems[ecl_idx]; + if (!re_node_set_contains (&except_nodes, cur_node)) + { + int idx = re_node_set_contains (dest_nodes, cur_node) - 1; + re_node_set_remove_at (dest_nodes, idx); + } + } + re_node_set_free (&except_nodes); + return REG_NOERROR; +} + +static int +internal_function +check_dst_limits (const re_match_context_t *mctx, re_node_set *limits, + int dst_node, int dst_idx, int src_node, int src_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int lim_idx, src_pos, dst_pos; + + int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); + int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); + for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) + { + int subexp_idx; + struct re_backref_cache_entry *ent; + ent = mctx->bkref_ents + limits->elems[lim_idx]; + subexp_idx = dfa->nodes[ent->node].opr.idx; + + dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], + subexp_idx, dst_node, dst_idx, + dst_bkref_idx); + src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], + subexp_idx, src_node, src_idx, + src_bkref_idx); + + /* In case of: + ( ) + ( ) + ( ) */ + if (src_pos == dst_pos) + continue; /* This is unrelated limitation. */ + else + return 1; + } + return 0; +} + +static int +internal_function +check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, + int subexp_idx, int from_node, int bkref_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *eclosures = dfa->eclosures + from_node; + int node_idx; + + /* Else, we are on the boundary: examine the nodes on the epsilon + closure. */ + for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) + { + int node = eclosures->elems[node_idx]; + switch (dfa->nodes[node].type) + { + case OP_BACK_REF: + if (bkref_idx != -1) + { + struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; + do + { + int dst, cpos; + + if (ent->node != node) + continue; + + if (subexp_idx < BITSET_WORD_BITS + && !(ent->eps_reachable_subexps_map + & ((bitset_word_t) 1 << subexp_idx))) + continue; + + /* Recurse trying to reach the OP_OPEN_SUBEXP and + OP_CLOSE_SUBEXP cases below. But, if the + destination node is the same node as the source + node, don't recurse because it would cause an + infinite loop: a regex that exhibits this behavior + is ()\1*\1* */ + dst = dfa->edests[node].elems[0]; + if (dst == from_node) + { + if (boundaries & 1) + return -1; + else /* if (boundaries & 2) */ + return 0; + } + + cpos = + check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + dst, bkref_idx); + if (cpos == -1 /* && (boundaries & 1) */) + return -1; + if (cpos == 0 && (boundaries & 2)) + return 0; + + if (subexp_idx < BITSET_WORD_BITS) + ent->eps_reachable_subexps_map + &= ~((bitset_word_t) 1 << subexp_idx); + } + while (ent++->more); + } + break; + + case OP_OPEN_SUBEXP: + if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx) + return -1; + break; + + case OP_CLOSE_SUBEXP: + if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx) + return 0; + break; + + default: + break; + } + } + + return (boundaries & 2) ? 1 : 0; +} + +static int +internal_function +check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit, + int subexp_idx, int from_node, int str_idx, + int bkref_idx) +{ + struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; + int boundaries; + + /* If we are outside the range of the subexpression, return -1 or 1. */ + if (str_idx < lim->subexp_from) + return -1; + + if (lim->subexp_to < str_idx) + return 1; + + /* If we are within the subexpression, return 0. */ + boundaries = (str_idx == lim->subexp_from); + boundaries |= (str_idx == lim->subexp_to) << 1; + if (boundaries == 0) + return 0; + + /* Else, examine epsilon closure. */ + return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + from_node, bkref_idx); +} + +/* Check the limitations of sub expressions LIMITS, and remove the nodes + which are against limitations from DEST_NODES. */ + +static reg_errcode_t +internal_function +check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates, re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, int str_idx) +{ + reg_errcode_t err; + int node_idx, lim_idx; + + for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) + { + int subexp_idx; + struct re_backref_cache_entry *ent; + ent = bkref_ents + limits->elems[lim_idx]; + + if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) + continue; /* This is unrelated limitation. */ + + subexp_idx = dfa->nodes[ent->node].opr.idx; + if (ent->subexp_to == str_idx) + { + int ops_node = -1; + int cls_node = -1; + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + re_token_type_t type = dfa->nodes[node].type; + if (type == OP_OPEN_SUBEXP + && subexp_idx == dfa->nodes[node].opr.idx) + ops_node = node; + else if (type == OP_CLOSE_SUBEXP + && subexp_idx == dfa->nodes[node].opr.idx) + cls_node = node; + } + + /* Check the limitation of the open subexpression. */ + /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ + if (ops_node >= 0) + { + err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + /* Check the limitation of the close subexpression. */ + if (cls_node >= 0) + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + if (!re_node_set_contains (dfa->inveclosures + node, + cls_node) + && !re_node_set_contains (dfa->eclosures + node, + cls_node)) + { + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + --node_idx; + } + } + } + else /* (ent->subexp_to != str_idx) */ + { + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + re_token_type_t type = dfa->nodes[node].type; + if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) + { + if (subexp_idx != dfa->nodes[node].opr.idx) + continue; + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int node_idx, node; + re_sift_context_t local_sctx; + int first_idx = search_cur_bkref_entry (mctx, str_idx); + + if (first_idx == -1) + return REG_NOERROR; + + local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ + + for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) + { + int enabled_idx; + re_token_type_t type; + struct re_backref_cache_entry *entry; + node = candidates->elems[node_idx]; + type = dfa->nodes[node].type; + /* Avoid infinite loop for the REs like "()\1+". */ + if (node == sctx->last_node && str_idx == sctx->last_str_idx) + continue; + if (type != OP_BACK_REF) + continue; + + entry = mctx->bkref_ents + first_idx; + enabled_idx = first_idx; + do + { + int subexp_len; + int to_idx; + int dst_node; + int ret; + re_dfastate_t *cur_state; + + if (entry->node != node) + continue; + subexp_len = entry->subexp_to - entry->subexp_from; + to_idx = str_idx + subexp_len; + dst_node = (subexp_len ? dfa->nexts[node] + : dfa->edests[node].elems[0]); + + if (to_idx > sctx->last_str_idx + || sctx->sifted_states[to_idx] == NULL + || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node) + || check_dst_limits (mctx, &sctx->limits, node, + str_idx, dst_node, to_idx)) + continue; + + if (local_sctx.sifted_states == NULL) + { + local_sctx = *sctx; + err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + local_sctx.last_node = node; + local_sctx.last_str_idx = str_idx; + ret = re_node_set_insert (&local_sctx.limits, enabled_idx); + if (BE (ret < 0, 0)) + { + err = REG_ESPACE; + goto free_return; + } + cur_state = local_sctx.sifted_states[str_idx]; + err = sift_states_backward (mctx, &local_sctx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + if (sctx->limited_states != NULL) + { + err = merge_state_array (dfa, sctx->limited_states, + local_sctx.sifted_states, + str_idx + 1); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + local_sctx.sifted_states[str_idx] = cur_state; + re_node_set_remove (&local_sctx.limits, enabled_idx); + + /* mctx->bkref_ents may have changed, reload the pointer. */ + entry = mctx->bkref_ents + enabled_idx; + } + while (enabled_idx++, entry++->more); + } + err = REG_NOERROR; + free_return: + if (local_sctx.sifted_states != NULL) + { + re_node_set_free (&local_sctx.limits); + } + + return err; +} + + +#ifdef RE_ENABLE_I18N +static int +internal_function +sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int naccepted; + /* Check the node can accept `multi byte'. */ + naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); + if (naccepted > 0 && str_idx + naccepted <= max_str_idx && + !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], + dfa->nexts[node_idx])) + /* The node can't accept the `multi byte', or the + destination was already thrown away, then the node + could't accept the current input `multi byte'. */ + naccepted = 0; + /* Otherwise, it is sure that the node could accept + `naccepted' bytes input. */ + return naccepted; +} +#endif /* RE_ENABLE_I18N */ + + +/* Functions for state transition. */ + +/* Return the next state to which the current state STATE will transit by + accepting the current input byte, and update STATE_LOG if necessary. + If STATE can accept a multibyte char/collating element/back reference + update the destination of STATE_LOG. */ + +static re_dfastate_t * +internal_function +transit_state (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) +{ + re_dfastate_t **trtable; + unsigned char ch; + +#ifdef RE_ENABLE_I18N + /* If the current state can accept multibyte. */ + if (BE (state->accept_mb, 0)) + { + *err = transit_state_mb (mctx, state); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } +#endif /* RE_ENABLE_I18N */ + + /* Then decide the next state with the single byte. */ +#if 0 + if (0) + /* don't use transition table */ + return transit_state_sb (err, mctx, state); +#endif + + /* Use transition table */ + ch = re_string_fetch_byte (&mctx->input); + for (;;) + { + trtable = state->trtable; + if (BE (trtable != NULL, 1)) + return trtable[ch]; + + trtable = state->word_trtable; + if (BE (trtable != NULL, 1)) + { + unsigned int context; + context + = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input) - 1, + mctx->eflags); + if (IS_WORD_CONTEXT (context)) + return trtable[ch + SBC_MAX]; + else + return trtable[ch]; + } + + if (!build_trtable (mctx->dfa, state)) + { + *err = REG_ESPACE; + return NULL; + } + + /* Retry, we now have a transition table. */ + } +} + +/* Update the state_log if we need */ +re_dfastate_t * +internal_function +merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *next_state) +{ + const re_dfa_t *const dfa = mctx->dfa; + int cur_idx = re_string_cur_idx (&mctx->input); + + if (cur_idx > mctx->state_log_top) + { + mctx->state_log[cur_idx] = next_state; + mctx->state_log_top = cur_idx; + } + else if (mctx->state_log[cur_idx] == 0) + { + mctx->state_log[cur_idx] = next_state; + } + else + { + re_dfastate_t *pstate; + unsigned int context; + re_node_set next_nodes, *log_nodes, *table_nodes = NULL; + /* If (state_log[cur_idx] != 0), it implies that cur_idx is + the destination of a multibyte char/collating element/ + back reference. Then the next state is the union set of + these destinations and the results of the transition table. */ + pstate = mctx->state_log[cur_idx]; + log_nodes = pstate->entrance_nodes; + if (next_state != NULL) + { + table_nodes = next_state->entrance_nodes; + *err = re_node_set_init_union (&next_nodes, table_nodes, + log_nodes); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } + else + next_nodes = *log_nodes; + /* Note: We already add the nodes of the initial state, + then we don't need to add them here. */ + + context = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input) - 1, + mctx->eflags); + next_state = mctx->state_log[cur_idx] + = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + + if (table_nodes != NULL) + re_node_set_free (&next_nodes); + } + + if (BE (dfa->nbackref, 0) && next_state != NULL) + { + /* Check OP_OPEN_SUBEXP in the current state in case that we use them + later. We must check them here, since the back references in the + next state might use them. */ + *err = check_subexp_matching_top (mctx, &next_state->nodes, + cur_idx); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + + /* If the next state has back references. */ + if (next_state->has_backref) + { + *err = transit_state_bkref (mctx, &next_state->nodes); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + next_state = mctx->state_log[cur_idx]; + } + } + + return next_state; +} + +/* Skip bytes in the input that correspond to part of a + multi-byte match, then look in the log for a state + from which to restart matching. */ +re_dfastate_t * +internal_function +find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) +{ + re_dfastate_t *cur_state; + do + { + int max = mctx->state_log_top; + int cur_str_idx = re_string_cur_idx (&mctx->input); + + do + { + if (++cur_str_idx > max) + return NULL; + re_string_skip_bytes (&mctx->input, 1); + } + while (mctx->state_log[cur_str_idx] == NULL); + + cur_state = merge_state_with_log (err, mctx, NULL); + } + while (*err == REG_NOERROR && cur_state == NULL); + return cur_state; +} + +/* Helper functions for transit_state. */ + +/* From the node set CUR_NODES, pick up the nodes whose types are + OP_OPEN_SUBEXP and which have corresponding back references in the regular + expression. And register them to use them later for evaluating the + correspoding back references. */ + +static reg_errcode_t +internal_function +check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, + int str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int node_idx; + reg_errcode_t err; + + /* TODO: This isn't efficient. + Because there might be more than one nodes whose types are + OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all + nodes. + E.g. RE: (a){2} */ + for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) + { + int node = cur_nodes->elems[node_idx]; + if (dfa->nodes[node].type == OP_OPEN_SUBEXP + && dfa->nodes[node].opr.idx < BITSET_WORD_BITS + && (dfa->used_bkref_map + & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx))) + { + err = match_ctx_add_subtop (mctx, node, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; +} + +#if 0 +/* Return the next state to which the current state STATE will transit by + accepting the current input byte. */ + +static re_dfastate_t * +transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) +{ + const re_dfa_t *const dfa = mctx->dfa; + re_node_set next_nodes; + re_dfastate_t *next_state; + int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); + unsigned int context; + + *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) + { + int cur_node = state->nodes.elems[node_cnt]; + if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx)) + { + *err = re_node_set_merge (&next_nodes, + dfa->eclosures + dfa->nexts[cur_node]); + if (BE (*err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return NULL; + } + } + } + context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags); + next_state = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + + re_node_set_free (&next_nodes); + re_string_skip_bytes (&mctx->input, 1); + return next_state; +} +#endif + +#ifdef RE_ENABLE_I18N +static reg_errcode_t +internal_function +transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int i; + + for (i = 0; i < pstate->nodes.nelem; ++i) + { + re_node_set dest_nodes, *new_nodes; + int cur_node_idx = pstate->nodes.elems[i]; + int naccepted, dest_idx; + unsigned int context; + re_dfastate_t *dest_state; + + if (!dfa->nodes[cur_node_idx].accept_mb) + continue; + + if (dfa->nodes[cur_node_idx].constraint) + { + context = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input), + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint, + context)) + continue; + } + + /* How many bytes the node can accept? */ + naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input, + re_string_cur_idx (&mctx->input)); + if (naccepted == 0) + continue; + + /* The node can accepts `naccepted' bytes. */ + dest_idx = re_string_cur_idx (&mctx->input) + naccepted; + mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted + : mctx->max_mb_elem_len); + err = clean_state_log_if_needed (mctx, dest_idx); + if (BE (err != REG_NOERROR, 0)) + return err; +#ifdef DEBUG + assert (dfa->nexts[cur_node_idx] != -1); +#endif + new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; + + dest_state = mctx->state_log[dest_idx]; + if (dest_state == NULL) + dest_nodes = *new_nodes; + else + { + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, new_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + context = re_string_context_at (&mctx->input, dest_idx - 1, + mctx->eflags); + mctx->state_log[dest_idx] + = re_acquire_state_context (&err, dfa, &dest_nodes, context); + if (dest_state != NULL) + re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) + return err; + } + return REG_NOERROR; +} +#endif /* RE_ENABLE_I18N */ + +static reg_errcode_t +internal_function +transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int i; + int cur_str_idx = re_string_cur_idx (&mctx->input); + + for (i = 0; i < nodes->nelem; ++i) + { + int dest_str_idx, prev_nelem, bkc_idx; + int node_idx = nodes->elems[i]; + unsigned int context; + const re_token_t *node = dfa->nodes + node_idx; + re_node_set *new_dest_nodes; + + /* Check whether `node' is a backreference or not. */ + if (node->type != OP_BACK_REF) + continue; + + if (node->constraint) + { + context = re_string_context_at (&mctx->input, cur_str_idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + continue; + } + + /* `node' is a backreference. + Check the substring which the substring matched. */ + bkc_idx = mctx->nbkref_ents; + err = get_subexp (mctx, node_idx, cur_str_idx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* And add the epsilon closures (which is `new_dest_nodes') of + the backreference to appropriate state_log. */ +#ifdef DEBUG + assert (dfa->nexts[node_idx] != -1); +#endif + for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) + { + int subexp_len; + re_dfastate_t *dest_state; + struct re_backref_cache_entry *bkref_ent; + bkref_ent = mctx->bkref_ents + bkc_idx; + if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) + continue; + subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; + new_dest_nodes = (subexp_len == 0 + ? dfa->eclosures + dfa->edests[node_idx].elems[0] + : dfa->eclosures + dfa->nexts[node_idx]); + dest_str_idx = (cur_str_idx + bkref_ent->subexp_to + - bkref_ent->subexp_from); + context = re_string_context_at (&mctx->input, dest_str_idx - 1, + mctx->eflags); + dest_state = mctx->state_log[dest_str_idx]; + prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 + : mctx->state_log[cur_str_idx]->nodes.nelem); + /* Add `new_dest_node' to state_log. */ + if (dest_state == NULL) + { + mctx->state_log[dest_str_idx] + = re_acquire_state_context (&err, dfa, new_dest_nodes, + context); + if (BE (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR, 0)) + goto free_return; + } + else + { + re_node_set dest_nodes; + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, + new_dest_nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&dest_nodes); + goto free_return; + } + mctx->state_log[dest_str_idx] + = re_acquire_state_context (&err, dfa, &dest_nodes, context); + re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR, 0)) + goto free_return; + } + /* We need to check recursively if the backreference can epsilon + transit. */ + if (subexp_len == 0 + && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem) + { + err = check_subexp_matching_top (mctx, new_dest_nodes, + cur_str_idx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + err = transit_state_bkref (mctx, new_dest_nodes); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + } + } + err = REG_NOERROR; + free_return: + return err; +} + +/* Enumerate all the candidates which the backreference BKREF_NODE can match + at BKREF_STR_IDX, and register them by match_ctx_add_entry(). + Note that we might collect inappropriate candidates here. + However, the cost of checking them strictly here is too high, then we + delay these checking for prune_impossible_nodes(). */ + +static reg_errcode_t +internal_function +get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int subexp_num, sub_top_idx; + const char *buf = (const char *) re_string_get_buffer (&mctx->input); + /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ + int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); + if (cache_idx != -1) + { + const struct re_backref_cache_entry *entry + = mctx->bkref_ents + cache_idx; + do + if (entry->node == bkref_node) + return REG_NOERROR; /* We already checked it. */ + while (entry++->more); + } + + subexp_num = dfa->nodes[bkref_node].opr.idx; + + /* For each sub expression */ + for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx) + { + reg_errcode_t err; + re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; + re_sub_match_last_t *sub_last; + int sub_last_idx, sl_str, bkref_str_off; + + if (dfa->nodes[sub_top->node].opr.idx != subexp_num) + continue; /* It isn't related. */ + + sl_str = sub_top->str_idx; + bkref_str_off = bkref_str_idx; + /* At first, check the last node of sub expressions we already + evaluated. */ + for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) + { + int sl_str_diff; + sub_last = sub_top->lasts[sub_last_idx]; + sl_str_diff = sub_last->str_idx - sl_str; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_diff > 0) + { + if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)) + { + /* Not enough chars for a successful match. */ + if (bkref_str_off + sl_str_diff > mctx->input.len) + break; + + err = clean_state_log_if_needed (mctx, + bkref_str_off + + sl_str_diff); + if (BE (err != REG_NOERROR, 0)) + return err; + buf = (const char *) re_string_get_buffer (&mctx->input); + } + if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0) + /* We don't need to search this sub expression any more. */ + break; + } + bkref_str_off += sl_str_diff; + sl_str += sl_str_diff; + err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, + bkref_str_idx); + + /* Reload buf, since the preceding call might have reallocated + the buffer. */ + buf = (const char *) re_string_get_buffer (&mctx->input); + + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (sub_last_idx < sub_top->nlasts) + continue; + if (sub_last_idx > 0) + ++sl_str; + /* Then, search for the other last nodes of the sub expression. */ + for (; sl_str <= bkref_str_idx; ++sl_str) + { + int cls_node, sl_str_off; + const re_node_set *nodes; + sl_str_off = sl_str - sub_top->str_idx; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_off > 0) + { + if (BE (bkref_str_off >= mctx->input.valid_len, 0)) + { + /* If we are at the end of the input, we cannot match. */ + if (bkref_str_off >= mctx->input.len) + break; + + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + + buf = (const char *) re_string_get_buffer (&mctx->input); + } + if (buf [bkref_str_off++] != buf[sl_str - 1]) + break; /* We don't need to search this sub expression + any more. */ + } + if (mctx->state_log[sl_str] == NULL) + continue; + /* Does this state have a ')' of the sub expression? */ + nodes = &mctx->state_log[sl_str]->nodes; + cls_node = find_subexp_node (dfa, nodes, subexp_num, + OP_CLOSE_SUBEXP); + if (cls_node == -1) + continue; /* No. */ + if (sub_top->path == NULL) + { + sub_top->path = calloc (sizeof (state_array_t), + sl_str - sub_top->str_idx + 1); + if (sub_top->path == NULL) + return REG_ESPACE; + } + /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node + in the current context? */ + err = check_arrival (mctx, sub_top->path, sub_top->node, + sub_top->str_idx, cls_node, sl_str, + OP_CLOSE_SUBEXP); + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); + if (BE (sub_last == NULL, 0)) + return REG_ESPACE; + err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, + bkref_str_idx); + if (err == REG_NOMATCH) + continue; + } + } + return REG_NOERROR; +} + +/* Helper functions for get_subexp(). */ + +/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR. + If it can arrive, register the sub expression expressed with SUB_TOP + and SUB_LAST. */ + +static reg_errcode_t +internal_function +get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, int bkref_node, int bkref_str) +{ + reg_errcode_t err; + int to_idx; + /* Can the subexpression arrive the back reference? */ + err = check_arrival (mctx, &sub_last->path, sub_last->node, + sub_last->str_idx, bkref_node, bkref_str, + OP_OPEN_SUBEXP); + if (err != REG_NOERROR) + return err; + err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, + sub_last->str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; + return clean_state_log_if_needed (mctx, to_idx); +} + +/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX. + Search '(' if FL_OPEN, or search ')' otherwise. + TODO: This function isn't efficient... + Because there might be more than one nodes whose types are + OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all + nodes. + E.g. RE: (a){2} */ + +static int +internal_function +find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) +{ + int cls_idx; + for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) + { + int cls_node = nodes->elems[cls_idx]; + const re_token_t *node = dfa->nodes + cls_node; + if (node->type == type + && node->opr.idx == subexp_idx) + return cls_node; + } + return -1; +} + +/* Check whether the node TOP_NODE at TOP_STR can arrive to the node + LAST_NODE at LAST_STR. We record the path onto PATH since it will be + heavily reused. + Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ + +static reg_errcode_t +internal_function +check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node, + int top_str, int last_node, int last_str, int type) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; + int subexp_num, backup_cur_idx, str_idx, null_cnt; + re_dfastate_t *cur_state = NULL; + re_node_set *cur_nodes, next_nodes; + re_dfastate_t **backup_state_log; + unsigned int context; + + subexp_num = dfa->nodes[top_node].opr.idx; + /* Extend the buffer if we need. */ + if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0)) + { + re_dfastate_t **new_array; + int old_alloc = path->alloc; + path->alloc += last_str + mctx->max_mb_elem_len + 1; + new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); + if (BE (new_array == NULL, 0)) + { + path->alloc = old_alloc; + return REG_ESPACE; + } + path->array = new_array; + memset (new_array + old_alloc, '\0', + sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); + } + + str_idx = path->next_idx ? 0 : top_str; + + /* Temporary modify MCTX. */ + backup_state_log = mctx->state_log; + backup_cur_idx = mctx->input.cur_idx; + mctx->state_log = path->array; + mctx->input.cur_idx = str_idx; + + /* Setup initial node set. */ + context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); + if (str_idx == top_str) + { + err = re_node_set_init_1 (&next_nodes, top_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + else + { + cur_state = mctx->state_log[str_idx]; + if (cur_state && cur_state->has_backref) + { + err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + re_node_set_init_empty (&next_nodes); + } + if (str_idx == top_str || (cur_state && cur_state->has_backref)) + { + if (next_nodes.nelem) + { + err = expand_bkref_cache (mctx, &next_nodes, str_idx, + subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + mctx->state_log[str_idx] = cur_state; + } + + for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;) + { + re_node_set_empty (&next_nodes); + if (mctx->state_log[str_idx + 1]) + { + err = re_node_set_merge (&next_nodes, + &mctx->state_log[str_idx + 1]->nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + if (cur_state) + { + err = check_arrival_add_next_nodes (mctx, str_idx, + &cur_state->non_eps_nodes, + &next_nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + ++str_idx; + if (next_nodes.nelem) + { + err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + err = expand_bkref_cache (mctx, &next_nodes, str_idx, + subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + mctx->state_log[str_idx] = cur_state; + null_cnt = cur_state == NULL ? null_cnt + 1 : 0; + } + re_node_set_free (&next_nodes); + cur_nodes = (mctx->state_log[last_str] == NULL ? NULL + : &mctx->state_log[last_str]->nodes); + path->next_idx = str_idx; + + /* Fix MCTX. */ + mctx->state_log = backup_state_log; + mctx->input.cur_idx = backup_cur_idx; + + /* Then check the current node set has the node LAST_NODE. */ + if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node)) + return REG_NOERROR; + + return REG_NOMATCH; +} + +/* Helper functions for check_arrival. */ + +/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them + to NEXT_NODES. + TODO: This function is similar to the functions transit_state*(), + however this function has many additional works. + Can't we unify them? */ + +static reg_errcode_t +internal_function +check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, + re_node_set *cur_nodes, re_node_set *next_nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + int result; + int cur_idx; + reg_errcode_t err = REG_NOERROR; + re_node_set union_set; + re_node_set_init_empty (&union_set); + for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) + { + int naccepted = 0; + int cur_node = cur_nodes->elems[cur_idx]; +#ifdef DEBUG + re_token_type_t type = dfa->nodes[cur_node].type; + assert (!IS_EPSILON_NODE (type)); +#endif +#ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (dfa->nodes[cur_node].accept_mb) + { + naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, + str_idx); + if (naccepted > 1) + { + re_dfastate_t *dest_state; + int next_node = dfa->nexts[cur_node]; + int next_idx = str_idx + naccepted; + dest_state = mctx->state_log[next_idx]; + re_node_set_empty (&union_set); + if (dest_state) + { + err = re_node_set_merge (&union_set, &dest_state->nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&union_set); + return err; + } + } + result = re_node_set_insert (&union_set, next_node); + if (BE (result < 0, 0)) + { + re_node_set_free (&union_set); + return REG_ESPACE; + } + mctx->state_log[next_idx] = re_acquire_state (&err, dfa, + &union_set); + if (BE (mctx->state_log[next_idx] == NULL + && err != REG_NOERROR, 0)) + { + re_node_set_free (&union_set); + return err; + } + } + } +#endif /* RE_ENABLE_I18N */ + if (naccepted + || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) + { + result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); + if (BE (result < 0, 0)) + { + re_node_set_free (&union_set); + return REG_ESPACE; + } + } + } + re_node_set_free (&union_set); + return REG_NOERROR; +} + +/* For all the nodes in CUR_NODES, add the epsilon closures of them to + CUR_NODES, however exclude the nodes which are: + - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN. + - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN. +*/ + +static reg_errcode_t +internal_function +check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, + int ex_subexp, int type) +{ + reg_errcode_t err; + int idx, outside_node; + re_node_set new_nodes; +#ifdef DEBUG + assert (cur_nodes->nelem); +#endif + err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); + if (BE (err != REG_NOERROR, 0)) + return err; + /* Create a new node set NEW_NODES with the nodes which are epsilon + closures of the node in CUR_NODES. */ + + for (idx = 0; idx < cur_nodes->nelem; ++idx) + { + int cur_node = cur_nodes->elems[idx]; + const re_node_set *eclosure = dfa->eclosures + cur_node; + outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); + if (outside_node == -1) + { + /* There are no problematic nodes, just merge them. */ + err = re_node_set_merge (&new_nodes, eclosure); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&new_nodes); + return err; + } + } + else + { + /* There are problematic nodes, re-calculate incrementally. */ + err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&new_nodes); + return err; + } + } + } + re_node_set_free (cur_nodes); + *cur_nodes = new_nodes; + return REG_NOERROR; +} + +/* Helper function for check_arrival_expand_ecl. + Check incrementally the epsilon closure of TARGET, and if it isn't + problematic append it to DST_NODES. */ + +static reg_errcode_t +internal_function +check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, + int target, int ex_subexp, int type) +{ + int cur_node; + for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) + { + int err; + + if (dfa->nodes[cur_node].type == type + && dfa->nodes[cur_node].opr.idx == ex_subexp) + { + if (type == OP_CLOSE_SUBEXP) + { + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + } + break; + } + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + if (dfa->edests[cur_node].nelem == 0) + break; + if (dfa->edests[cur_node].nelem == 2) + { + err = check_arrival_expand_ecl_sub (dfa, dst_nodes, + dfa->edests[cur_node].elems[1], + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + return err; + } + cur_node = dfa->edests[cur_node].elems[0]; + } + return REG_NOERROR; +} + + +/* For all the back references in the current state, calculate the + destination of the back references by the appropriate entry + in MCTX->BKREF_ENTS. */ + +static reg_errcode_t +internal_function +expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, + int cur_str, int subexp_num, int type) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int cache_idx_start = search_cur_bkref_entry (mctx, cur_str); + struct re_backref_cache_entry *ent; + + if (cache_idx_start == -1) + return REG_NOERROR; + + restart: + ent = mctx->bkref_ents + cache_idx_start; + do + { + int to_idx, next_node; + + /* Is this entry ENT is appropriate? */ + if (!re_node_set_contains (cur_nodes, ent->node)) + continue; /* No. */ + + to_idx = cur_str + ent->subexp_to - ent->subexp_from; + /* Calculate the destination of the back reference, and append it + to MCTX->STATE_LOG. */ + if (to_idx == cur_str) + { + /* The backreference did epsilon transit, we must re-check all the + node in the current state. */ + re_node_set new_dests; + reg_errcode_t err2, err3; + next_node = dfa->edests[ent->node].elems[0]; + if (re_node_set_contains (cur_nodes, next_node)) + continue; + err = re_node_set_init_1 (&new_dests, next_node); + err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type); + err3 = re_node_set_merge (cur_nodes, &new_dests); + re_node_set_free (&new_dests); + if (BE (err != REG_NOERROR || err2 != REG_NOERROR + || err3 != REG_NOERROR, 0)) + { + err = (err != REG_NOERROR ? err + : (err2 != REG_NOERROR ? err2 : err3)); + return err; + } + /* TODO: It is still inefficient... */ + goto restart; + } + else + { + re_node_set union_set; + next_node = dfa->nexts[ent->node]; + if (mctx->state_log[to_idx]) + { + int ret; + if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, + next_node)) + continue; + err = re_node_set_init_copy (&union_set, + &mctx->state_log[to_idx]->nodes); + ret = re_node_set_insert (&union_set, next_node); + if (BE (err != REG_NOERROR || ret < 0, 0)) + { + re_node_set_free (&union_set); + err = err != REG_NOERROR ? err : REG_ESPACE; + return err; + } + } + else + { + err = re_node_set_init_1 (&union_set, next_node); + if (BE (err != REG_NOERROR, 0)) + return err; + } + mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); + re_node_set_free (&union_set); + if (BE (mctx->state_log[to_idx] == NULL + && err != REG_NOERROR, 0)) + return err; + } + } + while (ent++->more); + return REG_NOERROR; +} + +/* Build transition table for the state. + Return 1 if succeeded, otherwise return NULL. */ + +static int +internal_function +build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) +{ + reg_errcode_t err; + int i, j, ch, need_word_trtable = 0; + bitset_word_t elem, mask; + bool dests_node_malloced = false; + bool dest_states_malloced = false; + int ndests; /* Number of the destination states from `state'. */ + re_dfastate_t **trtable; + re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; + re_node_set follows, *dests_node; + bitset_t *dests_ch; + bitset_t acceptable; + + struct dests_alloc + { + re_node_set dests_node[SBC_MAX]; + bitset_t dests_ch[SBC_MAX]; + } *dests_alloc; + + /* We build DFA states which corresponds to the destination nodes + from `state'. `dests_node[i]' represents the nodes which i-th + destination state contains, and `dests_ch[i]' represents the + characters which i-th destination state accepts. */ + if (__libc_use_alloca (sizeof (struct dests_alloc))) + dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); + else + { + dests_alloc = re_malloc (struct dests_alloc, 1); + if (BE (dests_alloc == NULL, 0)) + return 0; + dests_node_malloced = true; + } + dests_node = dests_alloc->dests_node; + dests_ch = dests_alloc->dests_ch; + + /* Initialize transiton table. */ + state->word_trtable = state->trtable = NULL; + + /* At first, group all nodes belonging to `state' into several + destinations. */ + ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); + if (BE (ndests <= 0, 0)) + { + if (dests_node_malloced) + free (dests_alloc); + /* Return 0 in case of an error, 1 otherwise. */ + if (ndests == 0) + { + state->trtable = (re_dfastate_t **) + calloc (sizeof (re_dfastate_t *), SBC_MAX); + return 1; + } + return 0; + } + + err = re_node_set_alloc (&follows, ndests + 1); + if (BE (err != REG_NOERROR, 0)) + goto out_free; + + if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX + + ndests * 3 * sizeof (re_dfastate_t *))) + dest_states = (re_dfastate_t **) + alloca (ndests * 3 * sizeof (re_dfastate_t *)); + else + { + dest_states = (re_dfastate_t **) + malloc (ndests * 3 * sizeof (re_dfastate_t *)); + if (BE (dest_states == NULL, 0)) + { +out_free: + if (dest_states_malloced) + free (dest_states); + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + if (dests_node_malloced) + free (dests_alloc); + return 0; + } + dest_states_malloced = true; + } + dest_states_word = dest_states + ndests; + dest_states_nl = dest_states_word + ndests; + bitset_empty (acceptable); + + /* Then build the states for all destinations. */ + for (i = 0; i < ndests; ++i) + { + int next_node; + re_node_set_empty (&follows); + /* Merge the follows of this destination states. */ + for (j = 0; j < dests_node[i].nelem; ++j) + { + next_node = dfa->nexts[dests_node[i].elems[j]]; + if (next_node != -1) + { + err = re_node_set_merge (&follows, dfa->eclosures + next_node); + if (BE (err != REG_NOERROR, 0)) + goto out_free; + } + } + dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); + if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + /* If the new state has context constraint, + build appropriate states for these contexts. */ + if (dest_states[i]->has_constraint) + { + dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, + CONTEXT_WORD); + if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + + if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) + need_word_trtable = 1; + + dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, + CONTEXT_NEWLINE); + if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + } + else + { + dest_states_word[i] = dest_states[i]; + dest_states_nl[i] = dest_states[i]; + } + bitset_merge (acceptable, dests_ch[i]); + } + + if (!BE (need_word_trtable, 0)) + { + /* We don't care about whether the following character is a word + character, or we are in a single-byte character set so we can + discern by looking at the character code: allocate a + 256-entry transition table. */ + trtable = state->trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); + if (BE (trtable == NULL, 0)) + goto out_free; + + /* For all characters ch...: */ + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; + elem; + mask <<= 1, elem >>= 1, ++ch) + if (BE (elem & 1, 0)) + { + /* There must be exactly one destination which accepts + character ch. See group_nodes_into_DFAstates. */ + for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) + ; + + /* j-th destination accepts the word character ch. */ + if (dfa->word_char[i] & mask) + trtable[ch] = dest_states_word[j]; + else + trtable[ch] = dest_states[j]; + } + } + else + { + /* We care about whether the following character is a word + character, and we are in a multi-byte character set: discern + by looking at the character code: build two 256-entry + transition tables, one starting at trtable[0] and one + starting at trtable[SBC_MAX]. */ + trtable = state->word_trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX); + if (BE (trtable == NULL, 0)) + goto out_free; + + /* For all characters ch...: */ + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; + elem; + mask <<= 1, elem >>= 1, ++ch) + if (BE (elem & 1, 0)) + { + /* There must be exactly one destination which accepts + character ch. See group_nodes_into_DFAstates. */ + for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) + ; + + /* j-th destination accepts the word character ch. */ + trtable[ch] = dest_states[j]; + trtable[ch + SBC_MAX] = dest_states_word[j]; + } + } + + /* new line */ + if (bitset_contain (acceptable, NEWLINE_CHAR)) + { + /* The current state accepts newline character. */ + for (j = 0; j < ndests; ++j) + if (bitset_contain (dests_ch[j], NEWLINE_CHAR)) + { + /* k-th destination accepts newline character. */ + trtable[NEWLINE_CHAR] = dest_states_nl[j]; + if (need_word_trtable) + trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j]; + /* There must be only one destination which accepts + newline. See group_nodes_into_DFAstates. */ + break; + } + } + + if (dest_states_malloced) + free (dest_states); + + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + + if (dests_node_malloced) + free (dests_alloc); + + return 1; +} + +/* Group all nodes belonging to STATE into several destinations. + Then for all destinations, set the nodes belonging to the destination + to DESTS_NODE[i] and set the characters accepted by the destination + to DEST_CH[i]. This function return the number of destinations. */ + +static int +internal_function +group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, + re_node_set *dests_node, bitset_t *dests_ch) +{ + reg_errcode_t err; + int result; + int i, j, k; + int ndests; /* Number of the destinations from `state'. */ + bitset_t accepts; /* Characters a node can accept. */ + const re_node_set *cur_nodes = &state->nodes; + bitset_empty (accepts); + ndests = 0; + + /* For all the nodes belonging to `state', */ + for (i = 0; i < cur_nodes->nelem; ++i) + { + re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; + re_token_type_t type = node->type; + unsigned int constraint = node->constraint; + + /* Enumerate all single byte character this node can accept. */ + if (type == CHARACTER) + bitset_set (accepts, node->opr.c); + else if (type == SIMPLE_BRACKET) + { + bitset_merge (accepts, node->opr.sbcset); + } + else if (type == OP_PERIOD) + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + bitset_merge (accepts, dfa->sb_char); + else +#endif + bitset_set_all (accepts); + if (!(dfa->syntax & RE_DOT_NEWLINE)) + bitset_clear (accepts, '\n'); + if (dfa->syntax & RE_DOT_NOT_NULL) + bitset_clear (accepts, '\0'); + } +#ifdef RE_ENABLE_I18N + else if (type == OP_UTF8_PERIOD) + { + memset (accepts, '\xff', sizeof (bitset_t) / 2); + if (!(dfa->syntax & RE_DOT_NEWLINE)) + bitset_clear (accepts, '\n'); + if (dfa->syntax & RE_DOT_NOT_NULL) + bitset_clear (accepts, '\0'); + } +#endif + else + continue; + + /* Check the `accepts' and sift the characters which are not + match it the context. */ + if (constraint) + { + if (constraint & NEXT_NEWLINE_CONSTRAINT) + { + bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); + bitset_empty (accepts); + if (accepts_newline) + bitset_set (accepts, NEWLINE_CHAR); + else + continue; + } + if (constraint & NEXT_ENDBUF_CONSTRAINT) + { + bitset_empty (accepts); + continue; + } + + if (constraint & NEXT_WORD_CONSTRAINT) + { + bitset_word_t any_set = 0; + if (type == CHARACTER && !node->word_char) + { + bitset_empty (accepts); + continue; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); + else +#endif + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= dfa->word_char[j]); + if (!any_set) + continue; + } + if (constraint & NEXT_NOTWORD_CONSTRAINT) + { + bitset_word_t any_set = 0; + if (type == CHARACTER && node->word_char) + { + bitset_empty (accepts); + continue; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); + else +#endif + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= ~dfa->word_char[j]); + if (!any_set) + continue; + } + } + + /* Then divide `accepts' into DFA states, or create a new + state. Above, we make sure that accepts is not empty. */ + for (j = 0; j < ndests; ++j) + { + bitset_t intersec; /* Intersection sets, see below. */ + bitset_t remains; + /* Flags, see below. */ + bitset_word_t has_intersec, not_subset, not_consumed; + + /* Optimization, skip if this state doesn't accept the character. */ + if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) + continue; + + /* Enumerate the intersection set of this state and `accepts'. */ + has_intersec = 0; + for (k = 0; k < BITSET_WORDS; ++k) + has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; + /* And skip if the intersection set is empty. */ + if (!has_intersec) + continue; + + /* Then check if this state is a subset of `accepts'. */ + not_subset = not_consumed = 0; + for (k = 0; k < BITSET_WORDS; ++k) + { + not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; + not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; + } + + /* If this state isn't a subset of `accepts', create a + new group state, which has the `remains'. */ + if (not_subset) + { + bitset_copy (dests_ch[ndests], remains); + bitset_copy (dests_ch[j], intersec); + err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); + if (BE (err != REG_NOERROR, 0)) + goto error_return; + ++ndests; + } + + /* Put the position in the current group. */ + result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); + if (BE (result < 0, 0)) + goto error_return; + + /* If all characters are consumed, go to next node. */ + if (!not_consumed) + break; + } + /* Some characters remain, create a new group. */ + if (j == ndests) + { + bitset_copy (dests_ch[ndests], accepts); + err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); + if (BE (err != REG_NOERROR, 0)) + goto error_return; + ++ndests; + bitset_empty (accepts); + } + } + return ndests; + error_return: + for (j = 0; j < ndests; ++j) + re_node_set_free (dests_node + j); + return -1; +} + +#ifdef RE_ENABLE_I18N +/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. + Return the number of the bytes the node accepts. + STR_IDX is the current index of the input string. + + This function handles the nodes which can accept one character, or + one collating element like '.', '[a-z]', opposite to the other nodes + can only accept one byte. */ + +static int +internal_function +check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int str_idx) +{ + const re_token_t *node = dfa->nodes + node_idx; + int char_len, elem_len; + int i; + + if (BE (node->type == OP_UTF8_PERIOD, 0)) + { + unsigned char c = re_string_byte_at (input, str_idx), d; + if (BE (c < 0xc2, 1)) + return 0; + + if (str_idx + 2 > input->len) + return 0; + + d = re_string_byte_at (input, str_idx + 1); + if (c < 0xe0) + return (d < 0x80 || d > 0xbf) ? 0 : 2; + else if (c < 0xf0) + { + char_len = 3; + if (c == 0xe0 && d < 0xa0) + return 0; + } + else if (c < 0xf8) + { + char_len = 4; + if (c == 0xf0 && d < 0x90) + return 0; + } + else if (c < 0xfc) + { + char_len = 5; + if (c == 0xf8 && d < 0x88) + return 0; + } + else if (c < 0xfe) + { + char_len = 6; + if (c == 0xfc && d < 0x84) + return 0; + } + else + return 0; + + if (str_idx + char_len > input->len) + return 0; + + for (i = 1; i < char_len; ++i) + { + d = re_string_byte_at (input, str_idx + i); + if (d < 0x80 || d > 0xbf) + return 0; + } + return char_len; + } + + char_len = re_string_char_size_at (input, str_idx); + if (node->type == OP_PERIOD) + { + if (char_len <= 1) + return 0; + /* FIXME: I don't think this if is needed, as both '\n' + and '\0' are char_len == 1. */ + /* '.' accepts any one character except the following two cases. */ + if ((!(dfa->syntax & RE_DOT_NEWLINE) && + re_string_byte_at (input, str_idx) == '\n') || + ((dfa->syntax & RE_DOT_NOT_NULL) && + re_string_byte_at (input, str_idx) == '\0')) + return 0; + return char_len; + } + + elem_len = re_string_elem_size_at (input, str_idx); + if ((elem_len <= 1 && char_len <= 1) || char_len == 0) + return 0; + + if (node->type == COMPLEX_BRACKET) + { + const re_charset_t *cset = node->opr.mbcset; +# ifdef _LIBC + const unsigned char *pin + = ((const unsigned char *) re_string_get_buffer (input) + str_idx); + int j; + uint32_t nrules; +# endif /* _LIBC */ + int match_len = 0; + wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) + ? re_string_wchar_at (input, str_idx) : 0); + + /* match with multibyte character? */ + for (i = 0; i < cset->nmbchars; ++i) + if (wc == cset->mbchars[i]) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + /* match with character_class? */ + for (i = 0; i < cset->nchar_classes; ++i) + { + wctype_t wt = cset->char_classes[i]; + if (__iswctype (wc, wt)) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + +# ifdef _LIBC + nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules != 0) + { + unsigned int in_collseq = 0; + const int32_t *table, *indirect; + const unsigned char *weights, *extra; + const char *collseqwc; + /* This #include defines a local function! */ +# include + + /* match with collating_symbol? */ + if (cset->ncoll_syms) + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + for (i = 0; i < cset->ncoll_syms; ++i) + { + const unsigned char *coll_sym = extra + cset->coll_syms[i]; + /* Compare the length of input collating element and + the length of current collating element. */ + if (*coll_sym != elem_len) + continue; + /* Compare each bytes. */ + for (j = 0; j < *coll_sym; j++) + if (pin[j] != coll_sym[1 + j]) + break; + if (j == *coll_sym) + { + /* Match if every bytes is equal. */ + match_len = j; + goto check_node_accept_bytes_match; + } + } + + if (cset->nranges) + { + if (elem_len <= char_len) + { + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + in_collseq = __collseq_table_lookup (collseqwc, wc); + } + else + in_collseq = find_collation_sequence_value (pin, elem_len); + } + /* match with range expression? */ + for (i = 0; i < cset->nranges; ++i) + if (cset->range_starts[i] <= in_collseq + && in_collseq <= cset->range_ends[i]) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + + /* match with equivalence_class? */ + if (cset->nequiv_classes) + { + const unsigned char *cp = pin; + table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + weights = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); + int32_t idx = findidx (&cp); + if (idx > 0) + for (i = 0; i < cset->nequiv_classes; ++i) + { + int32_t equiv_class_idx = cset->equiv_classes[i]; + size_t weight_len = weights[idx & 0xffffff]; + if (weight_len == weights[equiv_class_idx & 0xffffff] + && (idx >> 24) == (equiv_class_idx >> 24)) + { + int cnt = 0; + + idx &= 0xffffff; + equiv_class_idx &= 0xffffff; + + while (cnt <= weight_len + && (weights[equiv_class_idx + 1 + cnt] + == weights[idx + 1 + cnt])) + ++cnt; + if (cnt > weight_len) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + } + } + } + } + else +# endif /* _LIBC */ + { + /* match with range expression? */ +#if __GNUC__ >= 2 + wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; +#else + wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + cmp_buf[2] = wc; +#endif + for (i = 0; i < cset->nranges; ++i) + { + cmp_buf[0] = cset->range_starts[i]; + cmp_buf[4] = cset->range_ends[i]; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + } + check_node_accept_bytes_match: + if (!cset->non_match) + return match_len; + else + { + if (match_len > 0) + return 0; + else + return (elem_len > char_len) ? elem_len : char_len; + } + } + return 0; +} + +# ifdef _LIBC +static unsigned int +internal_function +find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) +{ + uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules == 0) + { + if (mbs_len == 1) + { + /* No valid character. Match it as a single byte character. */ + const unsigned char *collseq = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); + return collseq[mbs[0]]; + } + return UINT_MAX; + } + else + { + int32_t idx; + const unsigned char *extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + int32_t extrasize = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra; + + for (idx = 0; idx < extrasize;) + { + int mbs_cnt, found = 0; + int32_t elem_mbs_len; + /* Skip the name of collating element name. */ + idx = idx + extra[idx] + 1; + elem_mbs_len = extra[idx++]; + if (mbs_len == elem_mbs_len) + { + for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt) + if (extra[idx + mbs_cnt] != mbs[mbs_cnt]) + break; + if (mbs_cnt == elem_mbs_len) + /* Found the entry. */ + found = 1; + } + /* Skip the byte sequence of the collating element. */ + idx += elem_mbs_len; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + /* Skip the collation sequence value. */ + idx += sizeof (uint32_t); + /* Skip the wide char sequence of the collating element. */ + idx = idx + sizeof (uint32_t) * (extra[idx] + 1); + /* If we found the entry, return the sequence value. */ + if (found) + return *(uint32_t *) (extra + idx); + /* Skip the collation sequence value. */ + idx += sizeof (uint32_t); + } + return UINT_MAX; + } +} +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ + +/* Check whether the node accepts the byte which is IDX-th + byte of the INPUT. */ + +static int +internal_function +check_node_accept (const re_match_context_t *mctx, const re_token_t *node, + int idx) +{ + unsigned char ch; + ch = re_string_byte_at (&mctx->input, idx); + switch (node->type) + { + case CHARACTER: + if (node->opr.c != ch) + return 0; + break; + + case SIMPLE_BRACKET: + if (!bitset_contain (node->opr.sbcset, ch)) + return 0; + break; + +#ifdef RE_ENABLE_I18N + case OP_UTF8_PERIOD: + if (ch >= 0x80) + return 0; + /* FALLTHROUGH */ +#endif + case OP_PERIOD: + if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) + || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) + return 0; + break; + + default: + return 0; + } + + if (node->constraint) + { + /* The node has constraints. Check whether the current context + satisfies the constraints. */ + unsigned int context = re_string_context_at (&mctx->input, idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + return 0; + } + + return 1; +} + +/* Extend the buffers, if the buffers have run out. */ + +static reg_errcode_t +internal_function +extend_buffers (re_match_context_t *mctx) +{ + reg_errcode_t ret; + re_string_t *pstr = &mctx->input; + + /* Double the lengthes of the buffers. */ + ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + if (mctx->state_log != NULL) + { + /* And double the length of state_log. */ + /* XXX We have no indication of the size of this buffer. If this + allocation fail we have no indication that the state_log array + does not have the right size. */ + re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *, + pstr->bufs_len + 1); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + mctx->state_log = new_array; + } + + /* Then reconstruct the buffers. */ + if (pstr->icase) + { +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + else +#endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); + } + else + { +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + build_wcs_buffer (pstr); + else +#endif /* RE_ENABLE_I18N */ + { + if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + } + return REG_NOERROR; +} + + +/* Functions for matching context. */ + +/* Initialize MCTX. */ + +static reg_errcode_t +internal_function +match_ctx_init (re_match_context_t *mctx, int eflags, int n) +{ + mctx->eflags = eflags; + mctx->match_last = -1; + if (n > 0) + { + mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); + mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); + if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) + return REG_ESPACE; + } + /* Already zero-ed by the caller. + else + mctx->bkref_ents = NULL; + mctx->nbkref_ents = 0; + mctx->nsub_tops = 0; */ + mctx->abkref_ents = n; + mctx->max_mb_elem_len = 1; + mctx->asub_tops = n; + return REG_NOERROR; +} + +/* Clean the entries which depend on the current input in MCTX. + This function must be invoked when the matcher changes the start index + of the input, or changes the input string. */ + +static void +internal_function +match_ctx_clean (re_match_context_t *mctx) +{ + int st_idx; + for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) + { + int sl_idx; + re_sub_match_top_t *top = mctx->sub_tops[st_idx]; + for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) + { + re_sub_match_last_t *last = top->lasts[sl_idx]; + re_free (last->path.array); + re_free (last); + } + re_free (top->lasts); + if (top->path) + { + re_free (top->path->array); + re_free (top->path); + } + free (top); + } + + mctx->nsub_tops = 0; + mctx->nbkref_ents = 0; +} + +/* Free all the memory associated with MCTX. */ + +static void +internal_function +match_ctx_free (re_match_context_t *mctx) +{ + /* First, free all the memory associated with MCTX->SUB_TOPS. */ + match_ctx_clean (mctx); + re_free (mctx->sub_tops); + re_free (mctx->bkref_ents); +} + +/* Add a new backreference entry to MCTX. + Note that we assume that caller never call this function with duplicate + entry, and call with STR_IDX which isn't smaller than any existing entry. +*/ + +static reg_errcode_t +internal_function +match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from, + int to) +{ + if (mctx->nbkref_ents >= mctx->abkref_ents) + { + struct re_backref_cache_entry* new_entry; + new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, + mctx->abkref_ents * 2); + if (BE (new_entry == NULL, 0)) + { + re_free (mctx->bkref_ents); + return REG_ESPACE; + } + mctx->bkref_ents = new_entry; + memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', + sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); + mctx->abkref_ents *= 2; + } + if (mctx->nbkref_ents > 0 + && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx) + mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1; + + mctx->bkref_ents[mctx->nbkref_ents].node = node; + mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; + mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; + mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; + + /* This is a cache that saves negative results of check_dst_limits_calc_pos. + If bit N is clear, means that this entry won't epsilon-transition to + an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If + it is set, check_dst_limits_calc_pos_1 will recurse and try to find one + such node. + + A backreference does not epsilon-transition unless it is empty, so set + to all zeros if FROM != TO. */ + mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map + = (from == to ? ~0 : 0); + + mctx->bkref_ents[mctx->nbkref_ents++].more = 0; + if (mctx->max_mb_elem_len < to - from) + mctx->max_mb_elem_len = to - from; + return REG_NOERROR; +} + +/* Search for the first entry which has the same str_idx, or -1 if none is + found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ + +static int +internal_function +search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) +{ + int left, right, mid, last; + last = right = mctx->nbkref_ents; + for (left = 0; left < right;) + { + mid = (left + right) / 2; + if (mctx->bkref_ents[mid].str_idx < str_idx) + left = mid + 1; + else + right = mid; + } + if (left < last && mctx->bkref_ents[left].str_idx == str_idx) + return left; + else + return -1; +} + +/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches + at STR_IDX. */ + +static reg_errcode_t +internal_function +match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx) +{ +#ifdef DEBUG + assert (mctx->sub_tops != NULL); + assert (mctx->asub_tops > 0); +#endif + if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) + { + int new_asub_tops = mctx->asub_tops * 2; + re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, + re_sub_match_top_t *, + new_asub_tops); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + mctx->sub_tops = new_array; + mctx->asub_tops = new_asub_tops; + } + mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); + if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0)) + return REG_ESPACE; + mctx->sub_tops[mctx->nsub_tops]->node = node; + mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx; + return REG_NOERROR; +} + +/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches + at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ + +static re_sub_match_last_t * +internal_function +match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx) +{ + re_sub_match_last_t *new_entry; + if (BE (subtop->nlasts == subtop->alasts, 0)) + { + int new_alasts = 2 * subtop->alasts + 1; + re_sub_match_last_t **new_array = re_realloc (subtop->lasts, + re_sub_match_last_t *, + new_alasts); + if (BE (new_array == NULL, 0)) + return NULL; + subtop->lasts = new_array; + subtop->alasts = new_alasts; + } + new_entry = calloc (1, sizeof (re_sub_match_last_t)); + if (BE (new_entry != NULL, 1)) + { + subtop->lasts[subtop->nlasts] = new_entry; + new_entry->node = node; + new_entry->str_idx = str_idx; + ++subtop->nlasts; + } + return new_entry; +} + +static void +internal_function +sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, int last_str_idx) +{ + sctx->sifted_states = sifted_sts; + sctx->limited_states = limited_sts; + sctx->last_node = last_node; + sctx->last_str_idx = last_str_idx; + re_node_set_init_empty (&sctx->limits); +} diff --git a/gnu_regex/README.txt b/gnu_regex/README.txt new file mode 100644 index 0000000..8fccbea --- /dev/null +++ b/gnu_regex/README.txt @@ -0,0 +1,5 @@ +These source files were taken from the GNU glibc-2.10.1 package. + + ftp://ftp.gnu.org/gnu/glibc/glibc-2.10.1.tar.bz2 + +Minor changes were made to eliminate compiler errors and warnings. diff --git a/gnu_regex/regcomp.c b/gnu_regex/regcomp.c new file mode 100644 index 0000000..1f3daf2 --- /dev/null +++ b/gnu_regex/regcomp.c @@ -0,0 +1,3818 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002,2003,2004,2005,2006,2007,2009 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, + size_t length, reg_syntax_t syntax); +static void re_compile_fastmap_iter (regex_t *bufp, + const re_dfastate_t *init_state, + char *fastmap); +static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); +#ifdef RE_ENABLE_I18N +static void free_charset (re_charset_t *cset); +#endif /* RE_ENABLE_I18N */ +static void free_workarea_compile (regex_t *preg); +static reg_errcode_t create_initial_state (re_dfa_t *dfa); +#ifdef RE_ENABLE_I18N +static void optimize_utf8 (re_dfa_t *dfa); +#endif +static reg_errcode_t analyze (regex_t *preg); +static reg_errcode_t preorder (bin_tree_t *root, + reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra); +static reg_errcode_t postorder (bin_tree_t *root, + reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra); +static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node); +static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node); +static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg, + bin_tree_t *node); +static reg_errcode_t calc_first (void *extra, bin_tree_t *node); +static reg_errcode_t calc_next (void *extra, bin_tree_t *node); +static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); +static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint); +static int search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint); +static reg_errcode_t calc_eclosure (re_dfa_t *dfa); +static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, + int node, int root); +static reg_errcode_t calc_inveclosure (re_dfa_t *dfa); +static int fetch_number (re_string_t *input, re_token_t *token, + reg_syntax_t syntax); +static int peek_token (re_token_t *token, re_string_t *input, + reg_syntax_t syntax) internal_function; +static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, + reg_syntax_t syntax, reg_errcode_t *err); +static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, + re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err); +static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, + reg_errcode_t *err); +static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, + re_string_t *regexp, + re_token_t *token, int token_len, + re_dfa_t *dfa, + reg_syntax_t syntax, + int accept_hyphen); +static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, + re_string_t *regexp, + re_token_t *token); +#ifdef RE_ENABLE_I18N +static reg_errcode_t build_equiv_class (bitset_t sbcset, + re_charset_t *mbcset, + int *equiv_class_alloc, + const unsigned char *name); +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, + re_charset_t *mbcset, + int *char_class_alloc, + const unsigned char *class_name, + reg_syntax_t syntax); +#else /* not RE_ENABLE_I18N */ +static reg_errcode_t build_equiv_class (bitset_t sbcset, + const unsigned char *name); +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, + const unsigned char *class_name, + reg_syntax_t syntax); +#endif /* not RE_ENABLE_I18N */ +static bin_tree_t *build_charclass_op (re_dfa_t *dfa, + RE_TRANSLATE_TYPE trans, + const unsigned char *class_name, + const unsigned char *extra, + int non_match, reg_errcode_t *err); +static bin_tree_t *create_tree (re_dfa_t *dfa, + bin_tree_t *left, bin_tree_t *right, + re_token_type_t type); +static bin_tree_t *create_token_tree (re_dfa_t *dfa, + bin_tree_t *left, bin_tree_t *right, + const re_token_t *token); +static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); +static void free_token (re_token_t *node); +static reg_errcode_t free_tree (void *extra, bin_tree_t *node); +static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node); + +/* This table gives an error message for each of the error codes listed + in regex.h. Obviously the order here has to be same as there. + POSIX doesn't require that we do anything for REG_NOERROR, + but why not be nice? */ + +const char __re_error_msgid[] attribute_hidden = + { +#define REG_NOERROR_IDX 0 + gettext_noop ("Success") /* REG_NOERROR */ + "\0" +#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") + gettext_noop ("No match") /* REG_NOMATCH */ + "\0" +#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") + gettext_noop ("Invalid regular expression") /* REG_BADPAT */ + "\0" +#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") + gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ + "\0" +#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") + gettext_noop ("Invalid character class name") /* REG_ECTYPE */ + "\0" +#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") + gettext_noop ("Trailing backslash") /* REG_EESCAPE */ + "\0" +#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") + gettext_noop ("Invalid back reference") /* REG_ESUBREG */ + "\0" +#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") + gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ + "\0" +#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") + gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ + "\0" +#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") + gettext_noop ("Unmatched \\{") /* REG_EBRACE */ + "\0" +#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") + gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ + "\0" +#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") + gettext_noop ("Invalid range end") /* REG_ERANGE */ + "\0" +#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") + gettext_noop ("Memory exhausted") /* REG_ESPACE */ + "\0" +#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") + gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ + "\0" +#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") + gettext_noop ("Premature end of regular expression") /* REG_EEND */ + "\0" +#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") + gettext_noop ("Regular expression too big") /* REG_ESIZE */ + "\0" +#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") + gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ + }; + +const size_t __re_error_msgid_idx[] attribute_hidden = + { + REG_NOERROR_IDX, + REG_NOMATCH_IDX, + REG_BADPAT_IDX, + REG_ECOLLATE_IDX, + REG_ECTYPE_IDX, + REG_EESCAPE_IDX, + REG_ESUBREG_IDX, + REG_EBRACK_IDX, + REG_EPAREN_IDX, + REG_EBRACE_IDX, + REG_BADBR_IDX, + REG_ERANGE_IDX, + REG_ESPACE_IDX, + REG_BADRPT_IDX, + REG_EEND_IDX, + REG_ESIZE_IDX, + REG_ERPAREN_IDX + }; + +/* Entry points for GNU code. */ + +/* re_compile_pattern is the GNU regular expression compiler: it + compiles PATTERN (of length LENGTH) and puts the result in BUFP. + Returns 0 if the pattern was valid, otherwise an error string. + + Assumes the `allocated' (and perhaps `buffer') and `translate' fields + are set in BUFP on entry. */ + +const char * +re_compile_pattern (pattern, length, bufp) + const char *pattern; + size_t length; + struct re_pattern_buffer *bufp; +{ + reg_errcode_t ret; + + /* And GNU code determines whether or not to get register information + by passing null for the REGS argument to re_match, etc., not by + setting no_sub, unless RE_NO_SUB is set. */ + bufp->no_sub = !!(re_syntax_options & RE_NO_SUB); + + /* Match anchors at newline. */ + bufp->newline_anchor = 1; + + ret = re_compile_internal (bufp, pattern, length, re_syntax_options); + + if (!ret) + return NULL; + return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); +} +#ifdef _LIBC +weak_alias (__re_compile_pattern, re_compile_pattern) +#endif + +/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can + also be assigned to arbitrarily: each pattern buffer stores its own + syntax, so it can be changed between regex compilations. */ +/* This has no initializer because initialized variables in Emacs + become read-only after dumping. */ +reg_syntax_t re_syntax_options; + + +/* Specify the precise syntax of regexps for compilation. This provides + for compatibility for various utilities which historically have + different, incompatible syntaxes. + + The argument SYNTAX is a bit mask comprised of the various bits + defined in regex.h. We return the old syntax. */ + +reg_syntax_t +re_set_syntax (syntax) + reg_syntax_t syntax; +{ + reg_syntax_t ret = re_syntax_options; + + re_syntax_options = syntax; + return ret; +} +#ifdef _LIBC +weak_alias (__re_set_syntax, re_set_syntax) +#endif + +int +re_compile_fastmap (bufp) + struct re_pattern_buffer *bufp; +{ + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + char *fastmap = bufp->fastmap; + + memset (fastmap, '\0', sizeof (char) * SBC_MAX); + re_compile_fastmap_iter (bufp, dfa->init_state, fastmap); + if (dfa->init_state != dfa->init_state_word) + re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap); + if (dfa->init_state != dfa->init_state_nl) + re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap); + if (dfa->init_state != dfa->init_state_begbuf) + re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap); + bufp->fastmap_accurate = 1; + return 0; +} +#ifdef _LIBC +weak_alias (__re_compile_fastmap, re_compile_fastmap) +#endif + +static inline void +__attribute ((always_inline)) +re_set_fastmap (char *fastmap, int icase, int ch) +{ + fastmap[ch] = 1; + if (icase) + fastmap[tolower (ch)] = 1; +} + +/* Helper function for re_compile_fastmap. + Compile fastmap for the initial_state INIT_STATE. */ + +static void +re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, + char *fastmap) +{ + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + int node_cnt; + int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE)); + for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) + { + int node = init_state->nodes.elems[node_cnt]; + re_token_type_t type = dfa->nodes[node].type; + + if (type == CHARACTER) + { + re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); +#ifdef RE_ENABLE_I18N + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + { + unsigned char *buf = alloca (dfa->mb_cur_max), *p; + wchar_t wc; + mbstate_t state; + + p = buf; + *p++ = dfa->nodes[node].opr.c; + while (++node < dfa->nodes_len + && dfa->nodes[node].type == CHARACTER + && dfa->nodes[node].mb_partial) + *p++ = dfa->nodes[node].opr.c; + memset (&state, '\0', sizeof (state)); + if (__mbrtowc (&wc, (const char *) buf, p - buf, + &state) == p - buf + && (__wcrtomb ((char *) buf, towlower (wc), &state) + != (size_t) -1)) + re_set_fastmap (fastmap, 0, buf[0]); + } +#endif + } + else if (type == SIMPLE_BRACKET) + { + int i, ch; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + { + int j; + bitset_word_t w = dfa->nodes[node].opr.sbcset[i]; + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (w & ((bitset_word_t) 1 << j)) + re_set_fastmap (fastmap, icase, ch); + } + } +#ifdef RE_ENABLE_I18N + else if (type == COMPLEX_BRACKET) + { + re_charset_t *cset = dfa->nodes[node].opr.mbcset; + int i; + +# ifdef _LIBC + /* See if we have to try all bytes which start multiple collation + elements. + e.g. In da_DK, we want to catch 'a' since "aa" is a valid + collation element, and don't catch 'b' since 'b' is + the only collation element which starts from 'b' (and + it is caught by SIMPLE_BRACKET). */ + if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0 + && (cset->ncoll_syms || cset->nranges)) + { + const int32_t *table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + for (i = 0; i < SBC_MAX; ++i) + if (table[i] < 0) + re_set_fastmap (fastmap, icase, i); + } +# endif /* _LIBC */ + + /* See if we have to start the match at all multibyte characters, + i.e. where we would not find an invalid sequence. This only + applies to multibyte character sets; for single byte character + sets, the SIMPLE_BRACKET again suffices. */ + if (dfa->mb_cur_max > 1 + && (cset->nchar_classes || cset->non_match +# ifdef _LIBC + || cset->nequiv_classes +# endif /* _LIBC */ + )) + { + unsigned char c = 0; + do + { + mbstate_t mbs; + memset (&mbs, 0, sizeof (mbs)); + if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2) + re_set_fastmap (fastmap, false, (int) c); + } + while (++c != 0); + } + + else + { + /* ... Else catch all bytes which can start the mbchars. */ + for (i = 0; i < cset->nmbchars; ++i) + { + char buf[256]; + mbstate_t state; + memset (&state, '\0', sizeof (state)); + if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) + re_set_fastmap (fastmap, icase, *(unsigned char *) buf); + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + { + if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) + != (size_t) -1) + re_set_fastmap (fastmap, false, *(unsigned char *) buf); + } + } + } + } +#endif /* RE_ENABLE_I18N */ + else if (type == OP_PERIOD +#ifdef RE_ENABLE_I18N + || type == OP_UTF8_PERIOD +#endif /* RE_ENABLE_I18N */ + || type == END_OF_RE) + { + memset (fastmap, '\1', sizeof (char) * SBC_MAX); + if (type == END_OF_RE) + bufp->can_be_null = 1; + return; + } + } +} + +/* Entry point for POSIX code. */ +/* regcomp takes a regular expression as a string and compiles it. + + PREG is a regex_t *. We do not expect any fields to be initialized, + since POSIX says we shouldn't. Thus, we set + + `buffer' to the compiled pattern; + `used' to the length of the compiled pattern; + `syntax' to RE_SYNTAX_POSIX_EXTENDED if the + REG_EXTENDED bit in CFLAGS is set; otherwise, to + RE_SYNTAX_POSIX_BASIC; + `newline_anchor' to REG_NEWLINE being set in CFLAGS; + `fastmap' to an allocated space for the fastmap; + `fastmap_accurate' to zero; + `re_nsub' to the number of subexpressions in PATTERN. + + PATTERN is the address of the pattern string. + + CFLAGS is a series of bits which affect compilation. + + If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we + use POSIX basic syntax. + + If REG_NEWLINE is set, then . and [^...] don't match newline. + Also, regexec will try a match beginning after every newline. + + If REG_ICASE is set, then we considers upper- and lowercase + versions of letters to be equivalent when matching. + + If REG_NOSUB is set, then when PREG is passed to regexec, that + routine will report only success or failure, and nothing about the + registers. + + It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for + the return codes and their meanings.) */ + +int +regcomp (preg, pattern, cflags) + regex_t *__restrict preg; + const char *__restrict pattern; + int cflags; +{ + reg_errcode_t ret; + reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED + : RE_SYNTAX_POSIX_BASIC); + + preg->buffer = NULL; + preg->allocated = 0; + preg->used = 0; + + /* Try to allocate space for the fastmap. */ + preg->fastmap = re_malloc (char, SBC_MAX); + if (BE (preg->fastmap == NULL, 0)) + return REG_ESPACE; + + syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; + + /* If REG_NEWLINE is set, newlines are treated differently. */ + if (cflags & REG_NEWLINE) + { /* REG_NEWLINE implies neither . nor [^...] match newline. */ + syntax &= ~RE_DOT_NEWLINE; + syntax |= RE_HAT_LISTS_NOT_NEWLINE; + /* It also changes the matching behavior. */ + preg->newline_anchor = 1; + } + else + preg->newline_anchor = 0; + preg->no_sub = !!(cflags & REG_NOSUB); + preg->translate = NULL; + + ret = re_compile_internal (preg, pattern, strlen (pattern), syntax); + + /* POSIX doesn't distinguish between an unmatched open-group and an + unmatched close-group: both are REG_EPAREN. */ + if (ret == REG_ERPAREN) + ret = REG_EPAREN; + + /* We have already checked preg->fastmap != NULL. */ + if (BE (ret == REG_NOERROR, 1)) + /* Compute the fastmap now, since regexec cannot modify the pattern + buffer. This function never fails in this implementation. */ + (void) re_compile_fastmap (preg); + else + { + /* Some error occurred while compiling the expression. */ + re_free (preg->fastmap); + preg->fastmap = NULL; + } + + return (int) ret; +} +#ifdef _LIBC +weak_alias (__regcomp, regcomp) +#endif + +/* Returns a message corresponding to an error code, ERRCODE, returned + from either regcomp or regexec. We don't use PREG here. */ + +size_t +regerror ( + int errcode, + const regex_t *__restrict preg, + char *__restrict errbuf, + size_t errbuf_size) +{ + const char *msg; + size_t msg_size; + + if (BE (errcode < 0 + || errcode >= (int) (sizeof (__re_error_msgid_idx) + / sizeof (__re_error_msgid_idx[0])), 0)) + /* Only error codes returned by the rest of the code should be passed + to this routine. If we are given anything else, or if other regex + code generates an invalid error code, then the program has a bug. + Dump core so we can fix it. */ + abort (); + + msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]); + + msg_size = strlen (msg) + 1; /* Includes the null. */ + + if (BE (errbuf_size != 0, 1)) + { + if (BE (msg_size > errbuf_size, 0)) + { +#if defined HAVE_MEMPCPY || defined _LIBC + *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; +#else + memcpy (errbuf, msg, errbuf_size - 1); + errbuf[errbuf_size - 1] = 0; +#endif + } + else + memcpy (errbuf, msg, msg_size); + } + + return msg_size; +} +#ifdef _LIBC +weak_alias (__regerror, regerror) +#endif + + +#ifdef RE_ENABLE_I18N +/* This static array is used for the map to single-byte characters when + UTF-8 is used. Otherwise we would allocate memory just to initialize + it the same all the time. UTF-8 is the preferred encoding so this is + a worthwhile optimization. */ +static const bitset_t utf8_sb_map = +{ + /* Set the first 128 bits. */ + [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX +}; +#endif + + +static void +free_dfa_content (re_dfa_t *dfa) +{ + int i, j; + + if (dfa->nodes) + for (i = 0; i < dfa->nodes_len; ++i) + free_token (dfa->nodes + i); + re_free (dfa->nexts); + for (i = 0; i < dfa->nodes_len; ++i) + { + if (dfa->eclosures != NULL) + re_node_set_free (dfa->eclosures + i); + if (dfa->inveclosures != NULL) + re_node_set_free (dfa->inveclosures + i); + if (dfa->edests != NULL) + re_node_set_free (dfa->edests + i); + } + re_free (dfa->edests); + re_free (dfa->eclosures); + re_free (dfa->inveclosures); + re_free (dfa->nodes); + + if (dfa->state_table) + for (i = 0; i <= dfa->state_hash_mask; ++i) + { + struct re_state_table_entry *entry = dfa->state_table + i; + for (j = 0; j < entry->num; ++j) + { + re_dfastate_t *state = entry->array[j]; + free_state (state); + } + re_free (entry->array); + } + re_free (dfa->state_table); +#ifdef RE_ENABLE_I18N + if (dfa->sb_char != utf8_sb_map) + re_free (dfa->sb_char); +#endif + re_free (dfa->subexp_map); +#ifdef DEBUG + re_free (dfa->re_str); +#endif + + re_free (dfa); +} + + +/* Free dynamically allocated space used by PREG. */ + +void +regfree (preg) + regex_t *preg; +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + if (BE (dfa != NULL, 1)) + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + + re_free (preg->fastmap); + preg->fastmap = NULL; + + re_free (preg->translate); + preg->translate = NULL; +} +#ifdef _LIBC +weak_alias (__regfree, regfree) +#endif + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined _REGEX_RE_COMP || defined _LIBC + +/* BSD has one and only one pattern buffer. */ +static struct re_pattern_buffer re_comp_buf; + +char * +# ifdef _LIBC +/* Make these definitions weak in libc, so POSIX programs can redefine + these names if they don't use our functions, and still use + regcomp/regexec above without link errors. */ +weak_function +# endif +re_comp (s) + const char *s; +{ + reg_errcode_t ret; + char *fastmap; + + if (!s) + { + if (!re_comp_buf.buffer) + return gettext ("No previous regular expression"); + return 0; + } + + if (re_comp_buf.buffer) + { + fastmap = re_comp_buf.fastmap; + re_comp_buf.fastmap = NULL; + __regfree (&re_comp_buf); + memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); + re_comp_buf.fastmap = fastmap; + } + + if (re_comp_buf.fastmap == NULL) + { + re_comp_buf.fastmap = (char *) malloc (SBC_MAX); + if (re_comp_buf.fastmap == NULL) + return (char *) gettext (__re_error_msgid + + __re_error_msgid_idx[(int) REG_ESPACE]); + } + + /* Since `re_exec' always passes NULL for the `regs' argument, we + don't need to initialize the pattern buffer fields which affect it. */ + + /* Match anchors at newlines. */ + re_comp_buf.newline_anchor = 1; + + ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options); + + if (!ret) + return NULL; + + /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ + return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); +} + +#ifdef _LIBC +libc_freeres_fn (free_mem) +{ + __regfree (&re_comp_buf); +} +#endif + +#endif /* _REGEX_RE_COMP */ + +/* Internal entry point. + Compile the regular expression PATTERN, whose length is LENGTH. + SYNTAX indicate regular expression's syntax. */ + +static reg_errcode_t +re_compile_internal (regex_t *preg, const char * pattern, size_t length, + reg_syntax_t syntax) +{ + reg_errcode_t err = REG_NOERROR; + re_dfa_t *dfa; + re_string_t regexp; + + /* Initialize the pattern buffer. */ + preg->fastmap_accurate = 0; + preg->syntax = syntax; + preg->not_bol = preg->not_eol = 0; + preg->used = 0; + preg->re_nsub = 0; + preg->can_be_null = 0; + preg->regs_allocated = REGS_UNALLOCATED; + + /* Initialize the dfa. */ + dfa = (re_dfa_t *) preg->buffer; + if (BE (preg->allocated < sizeof (re_dfa_t), 0)) + { + /* If zero allocated, but buffer is non-null, try to realloc + enough space. This loses if buffer's address is bogus, but + that is the user's responsibility. If ->buffer is NULL this + is a simple allocation. */ + dfa = re_realloc (preg->buffer, re_dfa_t, 1); + if (dfa == NULL) + return REG_ESPACE; + preg->allocated = sizeof (re_dfa_t); + preg->buffer = (unsigned char *) dfa; + } + preg->used = sizeof (re_dfa_t); + + err = init_dfa (dfa, length); + if (BE (err != REG_NOERROR, 0)) + { + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + return err; + } +#ifdef DEBUG + /* Note: length+1 will not overflow since it is checked in init_dfa. */ + dfa->re_str = re_malloc (char, length + 1); + strncpy (dfa->re_str, pattern, length + 1); +#endif + + __libc_lock_init (dfa->lock); + + err = re_string_construct (®exp, pattern, length, preg->translate, + syntax & RE_ICASE, dfa); + if (BE (err != REG_NOERROR, 0)) + { + re_compile_internal_free_return: + free_workarea_compile (preg); + re_string_destruct (®exp); + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + return err; + } + + /* Parse the regular expression, and build a structure tree. */ + preg->re_nsub = 0; + dfa->str_tree = parse (®exp, preg, syntax, &err); + if (BE (dfa->str_tree == NULL, 0)) + goto re_compile_internal_free_return; + + /* Analyze the tree and create the nfa. */ + err = analyze (preg); + if (BE (err != REG_NOERROR, 0)) + goto re_compile_internal_free_return; + +#ifdef RE_ENABLE_I18N + /* If possible, do searching in single byte encoding to speed things up. */ + if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) + optimize_utf8 (dfa); +#endif + + /* Then create the initial state of the dfa. */ + err = create_initial_state (dfa); + + /* Release work areas. */ + free_workarea_compile (preg); + re_string_destruct (®exp); + + if (BE (err != REG_NOERROR, 0)) + { + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + } + + return err; +} + +/* Initialize DFA. We use the length of the regular expression PAT_LEN + as the initial length of some arrays. */ + +static reg_errcode_t +init_dfa (re_dfa_t *dfa, size_t pat_len) +{ + unsigned int table_size; +#ifndef _LIBC + char *codeset_name; +#endif + + memset (dfa, '\0', sizeof (re_dfa_t)); + + /* Force allocation of str_tree_storage the first time. */ + dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; + + /* Avoid overflows. */ + if (pat_len == SIZE_MAX) + return REG_ESPACE; + + dfa->nodes_alloc = pat_len + 1; + dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); + + /* table_size = 2 ^ ceil(log pat_len) */ + for (table_size = 1; ; table_size <<= 1) + if (table_size > pat_len) + break; + + dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size); + dfa->state_hash_mask = table_size - 1; + + dfa->mb_cur_max = MB_CUR_MAX; +#ifdef _LIBC + if (dfa->mb_cur_max == 6 + && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0) + dfa->is_utf8 = 1; + dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) + != 0); +#else +# ifdef HAVE_LANGINFO_CODESET + codeset_name = nl_langinfo (CODESET); +# else + codeset_name = getenv ("LC_ALL"); + if (codeset_name == NULL || codeset_name[0] == '\0') + codeset_name = getenv ("LC_CTYPE"); + if (codeset_name == NULL || codeset_name[0] == '\0') + codeset_name = getenv ("LANG"); + if (codeset_name == NULL) + codeset_name = ""; + else if (strchr (codeset_name, '.') != NULL) + codeset_name = strchr (codeset_name, '.') + 1; +# endif + + if (strcasecmp (codeset_name, "UTF-8") == 0 + || strcasecmp (codeset_name, "UTF8") == 0) + dfa->is_utf8 = 1; + + /* We check exhaustively in the loop below if this charset is a + superset of ASCII. */ + dfa->map_notascii = 0; +#endif + +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + if (dfa->is_utf8) + dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; + else + { + int i, j, ch; + + dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); + if (BE (dfa->sb_char == NULL, 0)) + return REG_ESPACE; + + /* Set the bits corresponding to single byte chars. */ + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + { + wint_t wch = __btowc (ch); + if (wch != WEOF) + dfa->sb_char[i] |= (bitset_word_t) 1 << j; +# ifndef _LIBC + if (isascii (ch) && wch != ch) + dfa->map_notascii = 1; +# endif + } + } + } +#endif + + if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)) + return REG_ESPACE; + return REG_NOERROR; +} + +/* Initialize WORD_CHAR table, which indicate which character is + "word". In this case "word" means that it is the word construction + character used by some operators like "\<", "\>", etc. */ + +static void +internal_function +init_word_char (re_dfa_t *dfa) +{ + int i, j, ch; + dfa->word_ops_used = 1; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (isalnum (ch) || ch == '_') + dfa->word_char[i] |= (bitset_word_t) 1 << j; +} + +/* Free the work area which are only used while compiling. */ + +static void +free_workarea_compile (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_storage_t *storage, *next; + for (storage = dfa->str_tree_storage; storage; storage = next) + { + next = storage->next; + re_free (storage); + } + dfa->str_tree_storage = NULL; + dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; + dfa->str_tree = NULL; + re_free (dfa->org_indices); + dfa->org_indices = NULL; +} + +/* Create initial states for all contexts. */ + +static reg_errcode_t +create_initial_state (re_dfa_t *dfa) +{ + int first, i; + reg_errcode_t err; + re_node_set init_nodes; + + /* Initial states have the epsilon closure of the node which is + the first node of the regular expression. */ + first = dfa->str_tree->first->node_idx; + dfa->init_node = first; + err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* The back-references which are in initial states can epsilon transit, + since in this case all of the subexpressions can be null. + Then we add epsilon closures of the nodes which are the next nodes of + the back-references. */ + if (dfa->nbackref > 0) + for (i = 0; i < init_nodes.nelem; ++i) + { + int node_idx = init_nodes.elems[i]; + re_token_type_t type = dfa->nodes[node_idx].type; + + int clexp_idx; + if (type != OP_BACK_REF) + continue; + for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) + { + re_token_t *clexp_node; + clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; + if (clexp_node->type == OP_CLOSE_SUBEXP + && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx) + break; + } + if (clexp_idx == init_nodes.nelem) + continue; + + if (type == OP_BACK_REF) + { + int dest_idx = dfa->edests[node_idx].elems[0]; + if (!re_node_set_contains (&init_nodes, dest_idx)) + { + re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); + i = 0; + } + } + } + + /* It must be the first time to invoke acquire_state. */ + dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); + /* We don't check ERR here, since the initial state must not be NULL. */ + if (BE (dfa->init_state == NULL, 0)) + return err; + if (dfa->init_state->has_constraint) + { + dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, + CONTEXT_WORD); + dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, + CONTEXT_NEWLINE); + dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, + &init_nodes, + CONTEXT_NEWLINE + | CONTEXT_BEGBUF); + if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL, 0)) + return err; + } + else + dfa->init_state_word = dfa->init_state_nl + = dfa->init_state_begbuf = dfa->init_state; + + re_node_set_free (&init_nodes); + return REG_NOERROR; +} + +#ifdef RE_ENABLE_I18N +/* If it is possible to do searching in single byte encoding instead of UTF-8 + to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change + DFA nodes where needed. */ + +static void +optimize_utf8 (re_dfa_t *dfa) +{ + int node, i, mb_chars = 0, has_period = 0; + + for (node = 0; node < dfa->nodes_len; ++node) + switch (dfa->nodes[node].type) + { + case CHARACTER: + if (dfa->nodes[node].opr.c >= 0x80) + mb_chars = 1; + break; + case ANCHOR: + switch (dfa->nodes[node].opr.ctx_type) + { + case LINE_FIRST: + case LINE_LAST: + case BUF_FIRST: + case BUF_LAST: + break; + default: + /* Word anchors etc. cannot be handled. It's okay to test + opr.ctx_type since constraints (for all DFA nodes) are + created by ORing one or more opr.ctx_type values. */ + return; + } + break; + case OP_PERIOD: + has_period = 1; + break; + case OP_BACK_REF: + case OP_ALT: + case END_OF_RE: + case OP_DUP_ASTERISK: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: + break; + case COMPLEX_BRACKET: + return; + case SIMPLE_BRACKET: + /* Just double check. The non-ASCII range starts at 0x80. */ + assert (0x80 % BITSET_WORD_BITS == 0); + for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) + if (dfa->nodes[node].opr.sbcset[i]) + return; + break; + default: + abort (); + } + + if (mb_chars || has_period) + for (node = 0; node < dfa->nodes_len; ++node) + { + if (dfa->nodes[node].type == CHARACTER + && dfa->nodes[node].opr.c >= 0x80) + dfa->nodes[node].mb_partial = 0; + else if (dfa->nodes[node].type == OP_PERIOD) + dfa->nodes[node].type = OP_UTF8_PERIOD; + } + + /* The search can be in single byte locale. */ + dfa->mb_cur_max = 1; + dfa->is_utf8 = 0; + dfa->has_mb_node = dfa->nbackref > 0 || has_period; +} +#endif + +/* Analyze the structure tree, and calculate "first", "next", "edest", + "eclosure", and "inveclosure". */ + +static reg_errcode_t +analyze (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + reg_errcode_t ret; + + /* Allocate arrays. */ + dfa->nexts = re_malloc (int, dfa->nodes_alloc); + dfa->org_indices = re_malloc (int, dfa->nodes_alloc); + dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); + dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); + if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL + || dfa->eclosures == NULL, 0)) + return REG_ESPACE; + + dfa->subexp_map = re_malloc (int, preg->re_nsub); + if (dfa->subexp_map != NULL) + { + int i; + for (i = 0; i < preg->re_nsub; i++) + dfa->subexp_map[i] = i; + preorder (dfa->str_tree, optimize_subexps, dfa); + for (i = 0; i < preg->re_nsub; i++) + if (dfa->subexp_map[i] != i) + break; + if (i == preg->re_nsub) + { + free (dfa->subexp_map); + dfa->subexp_map = NULL; + } + } + + ret = postorder (dfa->str_tree, lower_subexps, preg); + if (BE (ret != REG_NOERROR, 0)) + return ret; + ret = postorder (dfa->str_tree, calc_first, dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + preorder (dfa->str_tree, calc_next, dfa); + ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + ret = calc_eclosure (dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + /* We only need this during the prune_impossible_nodes pass in regexec.c; + skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */ + if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match) + || dfa->nbackref) + { + dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); + if (BE (dfa->inveclosures == NULL, 0)) + return REG_ESPACE; + ret = calc_inveclosure (dfa); + } + + return ret; +} + +/* Our parse trees are very unbalanced, so we cannot use a stack to + implement parse tree visits. Instead, we use parent pointers and + some hairy code in these two functions. */ +static reg_errcode_t +postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) +{ + bin_tree_t *node, *prev; + + for (node = root; ; ) + { + /* Descend down the tree, preferably to the left (or to the right + if that's the only child). */ + while (node->left || node->right) + if (node->left) + node = node->left; + else + node = node->right; + + do + { + reg_errcode_t err = fn (extra, node); + if (BE (err != REG_NOERROR, 0)) + return err; + if (node->parent == NULL) + return REG_NOERROR; + prev = node; + node = node->parent; + } + /* Go up while we have a node that is reached from the right. */ + while (node->right == prev || node->right == NULL); + node = node->right; + } +} + +static reg_errcode_t +preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) +{ + bin_tree_t *node; + + for (node = root; ; ) + { + reg_errcode_t err = fn (extra, node); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* Go to the left node, or up and to the right. */ + if (node->left) + node = node->left; + else + { + bin_tree_t *prev = NULL; + while (node->right == prev || node->right == NULL) + { + prev = node; + node = node->parent; + if (!node) + return REG_NOERROR; + } + node = node->right; + } + } +} + +/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell + re_search_internal to map the inner one's opr.idx to this one's. Adjust + backreferences as well. Requires a preorder visit. */ +static reg_errcode_t +optimize_subexps (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + + if (node->token.type == OP_BACK_REF && dfa->subexp_map) + { + int idx = node->token.opr.idx; + node->token.opr.idx = dfa->subexp_map[idx]; + dfa->used_bkref_map |= 1 << node->token.opr.idx; + } + + else if (node->token.type == SUBEXP + && node->left && node->left->token.type == SUBEXP) + { + int other_idx = node->left->token.opr.idx; + + node->left = node->left->left; + if (node->left) + node->left->parent = node; + + dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; + if (other_idx < BITSET_WORD_BITS) + dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx); + } + + return REG_NOERROR; +} + +/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation + of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */ +static reg_errcode_t +lower_subexps (void *extra, bin_tree_t *node) +{ + regex_t *preg = (regex_t *) extra; + reg_errcode_t err = REG_NOERROR; + + if (node->left && node->left->token.type == SUBEXP) + { + node->left = lower_subexp (&err, preg, node->left); + if (node->left) + node->left->parent = node; + } + if (node->right && node->right->token.type == SUBEXP) + { + node->right = lower_subexp (&err, preg, node->right); + if (node->right) + node->right->parent = node; + } + + return err; +} + +static bin_tree_t * +lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *body = node->left; + bin_tree_t *op, *cls, *tree1, *tree; + + if (preg->no_sub + /* We do not optimize empty subexpressions, because otherwise we may + have bad CONCAT nodes with NULL children. This is obviously not + very common, so we do not lose much. An example that triggers + this case is the sed "script" /\(\)/x. */ + && node->left != NULL + && (node->token.opr.idx >= BITSET_WORD_BITS + || !(dfa->used_bkref_map + & ((bitset_word_t) 1 << node->token.opr.idx)))) + return node->left; + + /* Convert the SUBEXP node to the concatenation of an + OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */ + op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP); + cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP); + tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls; + tree = create_tree (dfa, op, tree1, CONCAT); + if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + + op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx; + op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp; + return tree; +} + +/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton + nodes. Requires a postorder visit. */ +static reg_errcode_t +calc_first (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + if (node->token.type == CONCAT) + { + node->first = node->left->first; + node->node_idx = node->left->node_idx; + } + else + { + node->first = node; + node->node_idx = re_dfa_add_node (dfa, node->token); + if (BE (node->node_idx == -1, 0)) + return REG_ESPACE; + if (node->token.type == ANCHOR) + dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type; + } + return REG_NOERROR; +} + +/* Pass 2: compute NEXT on the tree. Preorder visit. */ +static reg_errcode_t +calc_next (void *extra, bin_tree_t *node) +{ + switch (node->token.type) + { + case OP_DUP_ASTERISK: + node->left->next = node; + break; + case CONCAT: + node->left->next = node->right->first; + node->right->next = node->next; + break; + default: + if (node->left) + node->left->next = node->next; + if (node->right) + node->right->next = node->next; + break; + } + return REG_NOERROR; +} + +/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */ +static reg_errcode_t +link_nfa_nodes (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + int idx = node->node_idx; + reg_errcode_t err = REG_NOERROR; + + switch (node->token.type) + { + case CONCAT: + break; + + case END_OF_RE: + assert (node->next == NULL); + break; + + case OP_DUP_ASTERISK: + case OP_ALT: + { + int left, right; + dfa->has_plural_match = 1; + if (node->left != NULL) + left = node->left->first->node_idx; + else + left = node->next->node_idx; + if (node->right != NULL) + right = node->right->first->node_idx; + else + right = node->next->node_idx; + assert (left > -1); + assert (right > -1); + err = re_node_set_init_2 (dfa->edests + idx, left, right); + } + break; + + case ANCHOR: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: + err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx); + break; + + case OP_BACK_REF: + dfa->nexts[idx] = node->next->node_idx; + if (node->token.type == OP_BACK_REF) + re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); + break; + + default: + assert (!IS_EPSILON_NODE (node->token.type)); + dfa->nexts[idx] = node->next->node_idx; + break; + } + + return err; +} + +/* Duplicate the epsilon closure of the node ROOT_NODE. + Note that duplicated nodes have constraint INIT_CONSTRAINT in addition + to their own constraint. */ + +static reg_errcode_t +internal_function +duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node, + int root_node, unsigned int init_constraint) +{ + int org_node, clone_node, ret; + unsigned int constraint = init_constraint; + for (org_node = top_org_node, clone_node = top_clone_node;;) + { + int org_dest, clone_dest; + if (dfa->nodes[org_node].type == OP_BACK_REF) + { + /* If the back reference epsilon-transit, its destination must + also have the constraint. Then duplicate the epsilon closure + of the destination of the back reference, and store it in + edests of the back reference. */ + org_dest = dfa->nexts[org_node]; + re_node_set_empty (dfa->edests + clone_node); + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + dfa->nexts[clone_node] = dfa->nexts[org_node]; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else if (dfa->edests[org_node].nelem == 0) + { + /* In case of the node can't epsilon-transit, don't duplicate the + destination and store the original destination as the + destination of the node. */ + dfa->nexts[clone_node] = dfa->nexts[org_node]; + break; + } + else if (dfa->edests[org_node].nelem == 1) + { + /* In case of the node can epsilon-transit, and it has only one + destination. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + /* If the node is root_node itself, it means the epsilon clsoure + has a loop. Then tie it to the destination of the root_node. */ + if (org_node == root_node && clone_node != org_node) + { + ret = re_node_set_insert (dfa->edests + clone_node, org_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + break; + } + /* In case of the node has another constraint, add it. */ + constraint |= dfa->nodes[org_node].constraint; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else /* dfa->edests[org_node].nelem == 2 */ + { + /* In case of the node can epsilon-transit, and it has two + destinations. In the bin_tree_t and DFA, that's '|' and '*'. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + /* Search for a duplicated node which satisfies the constraint. */ + clone_dest = search_duplicated_node (dfa, org_dest, constraint); + if (clone_dest == -1) + { + /* There is no such duplicated node, create a new one. */ + reg_errcode_t err; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + err = duplicate_node_closure (dfa, org_dest, clone_dest, + root_node, constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + { + /* There is a duplicated node which satisfies the constraint, + use it to avoid infinite loop. */ + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + + org_dest = dfa->edests[org_node].elems[1]; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + org_node = org_dest; + clone_node = clone_dest; + } + return REG_NOERROR; +} + +/* Search for a node which is duplicated from the node ORG_NODE, and + satisfies the constraint CONSTRAINT. */ + +static int +search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint) +{ + int idx; + for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) + { + if (org_node == dfa->org_indices[idx] + && constraint == dfa->nodes[idx].constraint) + return idx; /* Found. */ + } + return -1; /* Not found. */ +} + +/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. + Return the index of the new node, or -1 if insufficient storage is + available. */ + +static int +duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint) +{ + int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); + if (BE (dup_idx != -1, 1)) + { + dfa->nodes[dup_idx].constraint = constraint; + dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint; + dfa->nodes[dup_idx].duplicated = 1; + + /* Store the index of the original node. */ + dfa->org_indices[dup_idx] = org_idx; + } + return dup_idx; +} + +static reg_errcode_t +calc_inveclosure (re_dfa_t *dfa) +{ + int src, idx, ret; + for (idx = 0; idx < dfa->nodes_len; ++idx) + re_node_set_init_empty (dfa->inveclosures + idx); + + for (src = 0; src < dfa->nodes_len; ++src) + { + int *elems = dfa->eclosures[src].elems; + for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) + { + ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); + if (BE (ret == -1, 0)) + return REG_ESPACE; + } + } + + return REG_NOERROR; +} + +/* Calculate "eclosure" for all the node in DFA. */ + +static reg_errcode_t +calc_eclosure (re_dfa_t *dfa) +{ + int node_idx, incomplete; +#ifdef DEBUG + assert (dfa->nodes_len > 0); +#endif + incomplete = 0; + /* For each nodes, calculate epsilon closure. */ + for (node_idx = 0; ; ++node_idx) + { + reg_errcode_t err; + re_node_set eclosure_elem; + if (node_idx == dfa->nodes_len) + { + if (!incomplete) + break; + incomplete = 0; + node_idx = 0; + } + +#ifdef DEBUG + assert (dfa->eclosures[node_idx].nelem != -1); +#endif + + /* If we have already calculated, skip it. */ + if (dfa->eclosures[node_idx].nelem != 0) + continue; + /* Calculate epsilon closure of `node_idx'. */ + err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (dfa->eclosures[node_idx].nelem == 0) + { + incomplete = 1; + re_node_set_free (&eclosure_elem); + } + } + return REG_NOERROR; +} + +/* Calculate epsilon closure of NODE. */ + +static reg_errcode_t +calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root) +{ + reg_errcode_t err; + int i, incomplete; + re_node_set eclosure; + incomplete = 0; + err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* This indicates that we are calculating this node now. + We reference this value to avoid infinite loop. */ + dfa->eclosures[node].nelem = -1; + + /* If the current node has constraints, duplicate all nodes + since they must inherit the constraints. */ + if (dfa->nodes[node].constraint + && dfa->edests[node].nelem + && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) + { + err = duplicate_node_closure (dfa, node, node, node, + dfa->nodes[node].constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + /* Expand each epsilon destination nodes. */ + if (IS_EPSILON_NODE(dfa->nodes[node].type)) + for (i = 0; i < dfa->edests[node].nelem; ++i) + { + re_node_set eclosure_elem; + int edest = dfa->edests[node].elems[i]; + /* If calculating the epsilon closure of `edest' is in progress, + return intermediate result. */ + if (dfa->eclosures[edest].nelem == -1) + { + incomplete = 1; + continue; + } + /* If we haven't calculated the epsilon closure of `edest' yet, + calculate now. Otherwise use calculated epsilon closure. */ + if (dfa->eclosures[edest].nelem == 0) + { + err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + eclosure_elem = dfa->eclosures[edest]; + /* Merge the epsilon closure of `edest'. */ + re_node_set_merge (&eclosure, &eclosure_elem); + /* If the epsilon closure of `edest' is incomplete, + the epsilon closure of this node is also incomplete. */ + if (dfa->eclosures[edest].nelem == 0) + { + incomplete = 1; + re_node_set_free (&eclosure_elem); + } + } + + /* Epsilon closures include itself. */ + re_node_set_insert (&eclosure, node); + if (incomplete && !root) + dfa->eclosures[node].nelem = 0; + else + dfa->eclosures[node] = eclosure; + *new_set = eclosure; + return REG_NOERROR; +} + +/* Functions for token which are used in the parser. */ + +/* Fetch a token from INPUT. + We must not use this function inside bracket expressions. */ + +static void +internal_function +fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) +{ + re_string_skip_bytes (input, peek_token (result, input, syntax)); +} + +/* Peek a token from INPUT, and return the length of the token. + We must not use this function inside bracket expressions. */ + +static int +internal_function +peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) +{ + unsigned char c; + + if (re_string_eoi (input)) + { + token->type = END_OF_RE; + return 0; + } + + c = re_string_peek_byte (input, 0); + token->opr.c = c; + + token->word_char = 0; +#ifdef RE_ENABLE_I18N + token->mb_partial = 0; + if (input->mb_cur_max > 1 && + !re_string_first_byte (input, re_string_cur_idx (input))) + { + token->type = CHARACTER; + token->mb_partial = 1; + return 1; + } +#endif + if (c == '\\') + { + unsigned char c2; + if (re_string_cur_idx (input) + 1 >= re_string_length (input)) + { + token->type = BACK_SLASH; + return 1; + } + + c2 = re_string_peek_byte_case (input, 1); + token->opr.c = c2; + token->type = CHARACTER; +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc = re_string_wchar_at (input, + re_string_cur_idx (input) + 1); + token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; + } + else +#endif + token->word_char = IS_WORD_CHAR (c2) != 0; + + switch (c2) + { + case '|': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) + token->type = OP_ALT; + break; + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + if (!(syntax & RE_NO_BK_REFS)) + { + token->type = OP_BACK_REF; + token->opr.idx = c2 - '1'; + } + break; + case '<': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_FIRST; + } + break; + case '>': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_LAST; + } + break; + case 'b': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_DELIM; + } + break; + case 'B': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = NOT_WORD_DELIM; + } + break; + case 'w': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_WORD; + break; + case 'W': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTWORD; + break; + case 's': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_SPACE; + break; + case 'S': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTSPACE; + break; + case '`': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = BUF_FIRST; + } + break; + case '\'': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = BUF_LAST; + } + break; + case '(': + if (!(syntax & RE_NO_BK_PARENS)) + token->type = OP_OPEN_SUBEXP; + break; + case ')': + if (!(syntax & RE_NO_BK_PARENS)) + token->type = OP_CLOSE_SUBEXP; + break; + case '+': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_PLUS; + break; + case '?': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_QUESTION; + break; + case '{': + if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) + token->type = OP_OPEN_DUP_NUM; + break; + case '}': + if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) + token->type = OP_CLOSE_DUP_NUM; + break; + default: + break; + } + return 2; + } + + token->type = CHARACTER; +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); + token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; + } + else +#endif + token->word_char = IS_WORD_CHAR (token->opr.c); + + switch (c) + { + case '\n': + if (syntax & RE_NEWLINE_ALT) + token->type = OP_ALT; + break; + case '|': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) + token->type = OP_ALT; + break; + case '*': + token->type = OP_DUP_ASTERISK; + break; + case '+': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_PLUS; + break; + case '?': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_QUESTION; + break; + case '{': + if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + token->type = OP_OPEN_DUP_NUM; + break; + case '}': + if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + token->type = OP_CLOSE_DUP_NUM; + break; + case '(': + if (syntax & RE_NO_BK_PARENS) + token->type = OP_OPEN_SUBEXP; + break; + case ')': + if (syntax & RE_NO_BK_PARENS) + token->type = OP_CLOSE_SUBEXP; + break; + case '[': + token->type = OP_OPEN_BRACKET; + break; + case '.': + token->type = OP_PERIOD; + break; + case '^': + if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && + re_string_cur_idx (input) != 0) + { + char prev = re_string_peek_byte (input, -1); + if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') + break; + } + token->type = ANCHOR; + token->opr.ctx_type = LINE_FIRST; + break; + case '$': + if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && + re_string_cur_idx (input) + 1 != re_string_length (input)) + { + re_token_t next; + re_string_skip_bytes (input, 1); + peek_token (&next, input, syntax); + re_string_skip_bytes (input, -1); + if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) + break; + } + token->type = ANCHOR; + token->opr.ctx_type = LINE_LAST; + break; + default: + break; + } + return 1; +} + +/* Peek a token from INPUT, and return the length of the token. + We must not use this function out of bracket expressions. */ + +static int +internal_function +peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) +{ + unsigned char c; + if (re_string_eoi (input)) + { + token->type = END_OF_RE; + return 0; + } + c = re_string_peek_byte (input, 0); + token->opr.c = c; + +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1 && + !re_string_first_byte (input, re_string_cur_idx (input))) + { + token->type = CHARACTER; + return 1; + } +#endif /* RE_ENABLE_I18N */ + + if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) + && re_string_cur_idx (input) + 1 < re_string_length (input)) + { + /* In this case, '\' escape a character. */ + unsigned char c2; + re_string_skip_bytes (input, 1); + c2 = re_string_peek_byte (input, 0); + token->opr.c = c2; + token->type = CHARACTER; + return 1; + } + if (c == '[') /* '[' is a special char in a bracket exps. */ + { + unsigned char c2; + int token_len; + if (re_string_cur_idx (input) + 1 < re_string_length (input)) + c2 = re_string_peek_byte (input, 1); + else + c2 = 0; + token->opr.c = c2; + token_len = 2; + switch (c2) + { + case '.': + token->type = OP_OPEN_COLL_ELEM; + break; + case '=': + token->type = OP_OPEN_EQUIV_CLASS; + break; + case ':': + if (syntax & RE_CHAR_CLASSES) + { + token->type = OP_OPEN_CHAR_CLASS; + break; + } + /* else fall through. */ + default: + token->type = CHARACTER; + token->opr.c = c; + token_len = 1; + break; + } + return token_len; + } + switch (c) + { + case '-': + token->type = OP_CHARSET_RANGE; + break; + case ']': + token->type = OP_CLOSE_BRACKET; + break; + case '^': + token->type = OP_NON_MATCH_LIST; + break; + default: + token->type = CHARACTER; + } + return 1; +} + +/* Functions for parser. */ + +/* Entry point of the parser. + Parse the regular expression REGEXP and return the structure tree. + If an error is occured, ERR is set by error code, and return NULL. + This function build the following tree, from regular expression : + CAT + / \ + / \ + EOR + + CAT means concatenation. + EOR means end of regular expression. */ + +static bin_tree_t * +parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, + reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree, *eor, *root; + re_token_t current_token; + dfa->syntax = syntax; + fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE); + tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + eor = create_tree (dfa, NULL, NULL, END_OF_RE); + if (tree != NULL) + root = create_tree (dfa, tree, eor, CONCAT); + else + root = eor; + if (BE (eor == NULL || root == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + return root; +} + +/* This function build the following tree, from regular expression + |: + ALT + / \ + / \ + + + ALT means alternative, which represents the operator `|'. */ + +static bin_tree_t * +parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree, *branch = NULL; + tree = parse_branch (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + + while (token->type == OP_ALT) + { + fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); + if (token->type != OP_ALT && token->type != END_OF_RE + && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) + { + branch = parse_branch (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && branch == NULL, 0)) + return NULL; + } + else + branch = NULL; + tree = create_tree (dfa, tree, branch, OP_ALT); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + return tree; +} + +/* This function build the following tree, from regular expression + : + CAT + / \ + / \ + + + CAT means concatenation. */ + +static bin_tree_t * +parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + bin_tree_t *tree, *exp; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + tree = parse_expression (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + + while (token->type != OP_ALT && token->type != END_OF_RE + && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) + { + exp = parse_expression (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && exp == NULL, 0)) + { + return NULL; + } + if (tree != NULL && exp != NULL) + { + tree = create_tree (dfa, tree, exp, CONCAT); + if (tree == NULL) + { + *err = REG_ESPACE; + return NULL; + } + } + else if (tree == NULL) + tree = exp; + /* Otherwise exp == NULL, we don't need to create new tree. */ + } + return tree; +} + +/* This function build the following tree, from regular expression a*: + * + | + a +*/ + +static bin_tree_t * +parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree; + switch (token->type) + { + case CHARACTER: + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + while (!re_string_eoi (regexp) + && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) + { + bin_tree_t *mbc_remain; + fetch_token (token, regexp, syntax); + mbc_remain = create_token_tree (dfa, NULL, NULL, token); + tree = create_tree (dfa, tree, mbc_remain, CONCAT); + if (BE (mbc_remain == NULL || tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + } +#endif + break; + case OP_OPEN_SUBEXP: + tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_OPEN_BRACKET: + tree = parse_bracket_exp (regexp, dfa, token, syntax, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_BACK_REF: + if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)) + { + *err = REG_ESUBREG; + return NULL; + } + dfa->used_bkref_map |= 1 << token->opr.idx; + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + ++dfa->nbackref; + dfa->has_mb_node = 1; + break; + case OP_OPEN_DUP_NUM: + if (syntax & RE_CONTEXT_INVALID_DUP) + { + *err = REG_BADRPT; + return NULL; + } + /* FALLTHROUGH */ + case OP_DUP_ASTERISK: + case OP_DUP_PLUS: + case OP_DUP_QUESTION: + if (syntax & RE_CONTEXT_INVALID_OPS) + { + *err = REG_BADRPT; + return NULL; + } + else if (syntax & RE_CONTEXT_INDEP_OPS) + { + fetch_token (token, regexp, syntax); + return parse_expression (regexp, preg, token, syntax, nest, err); + } + /* else fall through */ + case OP_CLOSE_SUBEXP: + if ((token->type == OP_CLOSE_SUBEXP) && + !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) + { + *err = REG_ERPAREN; + return NULL; + } + /* else fall through */ + case OP_CLOSE_DUP_NUM: + /* We treat it as a normal character. */ + + /* Then we can these characters as normal characters. */ + token->type = CHARACTER; + /* mb_partial and word_char bits should be initialized already + by peek_token. */ + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + break; + case ANCHOR: + if ((token->opr.ctx_type + & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST)) + && dfa->word_ops_used == 0) + init_word_char (dfa); + if (token->opr.ctx_type == WORD_DELIM + || token->opr.ctx_type == NOT_WORD_DELIM) + { + bin_tree_t *tree_first, *tree_last; + if (token->opr.ctx_type == WORD_DELIM) + { + token->opr.ctx_type = WORD_FIRST; + tree_first = create_token_tree (dfa, NULL, NULL, token); + token->opr.ctx_type = WORD_LAST; + } + else + { + token->opr.ctx_type = INSIDE_WORD; + tree_first = create_token_tree (dfa, NULL, NULL, token); + token->opr.ctx_type = INSIDE_NOTWORD; + } + tree_last = create_token_tree (dfa, NULL, NULL, token); + tree = create_tree (dfa, tree_first, tree_last, OP_ALT); + if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + else + { + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + /* We must return here, since ANCHORs can't be followed + by repetition operators. + eg. RE"^*" is invalid or "", + it must not be "". */ + fetch_token (token, regexp, syntax); + return tree; + case OP_PERIOD: + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + if (dfa->mb_cur_max > 1) + dfa->has_mb_node = 1; + break; + case OP_WORD: + case OP_NOTWORD: + tree = build_charclass_op (dfa, regexp->trans, + (const unsigned char *) "alnum", + (const unsigned char *) "_", + token->type == OP_NOTWORD, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_SPACE: + case OP_NOTSPACE: + tree = build_charclass_op (dfa, regexp->trans, + (const unsigned char *) "space", + (const unsigned char *) "", + token->type == OP_NOTSPACE, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_ALT: + case END_OF_RE: + return NULL; + case BACK_SLASH: + *err = REG_EESCAPE; + return NULL; + default: + /* Must not happen? */ +#ifdef DEBUG + assert (0); +#endif + return NULL; + } + fetch_token (token, regexp, syntax); + + while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS + || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) + { + tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + /* In BRE consecutive duplications are not allowed. */ + if ((syntax & RE_CONTEXT_INVALID_DUP) + && (token->type == OP_DUP_ASTERISK + || token->type == OP_OPEN_DUP_NUM)) + { + *err = REG_BADRPT; + return NULL; + } + } + + return tree; +} + +/* This function build the following tree, from regular expression + (): + SUBEXP + | + +*/ + +static bin_tree_t * +parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree; + size_t cur_nsub; + cur_nsub = preg->re_nsub++; + + fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); + + /* The subexpression may be a null string. */ + if (token->type == OP_CLOSE_SUBEXP) + tree = NULL; + else + { + tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); + if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) + *err = REG_EPAREN; + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } + + if (cur_nsub <= '9' - '1') + dfa->completed_bkref_map |= 1 << cur_nsub; + + tree = create_tree (dfa, tree, NULL, SUBEXP); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + tree->token.opr.idx = cur_nsub; + return tree; +} + +/* This function parse repetition operators like "*", "+", "{1,3}" etc. */ + +static bin_tree_t * +parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err) +{ + bin_tree_t *tree = NULL, *old_tree = NULL; + int i, start, end, start_idx = re_string_cur_idx (regexp); + re_token_t start_token = *token; + + if (token->type == OP_OPEN_DUP_NUM) + { + end = 0; + start = fetch_number (regexp, token, syntax); + if (start == -1) + { + if (token->type == CHARACTER && token->opr.c == ',') + start = 0; /* We treat "{,m}" as "{0,m}". */ + else + { + *err = REG_BADBR; /* {} is invalid. */ + return NULL; + } + } + if (BE (start != -2, 1)) + { + /* We treat "{n}" as "{n,n}". */ + end = ((token->type == OP_CLOSE_DUP_NUM) ? start + : ((token->type == CHARACTER && token->opr.c == ',') + ? fetch_number (regexp, token, syntax) : -2)); + } + if (BE (start == -2 || end == -2, 0)) + { + /* Invalid sequence. */ + if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)) + { + if (token->type == END_OF_RE) + *err = REG_EBRACE; + else + *err = REG_BADBR; + + return NULL; + } + + /* If the syntax bit is set, rollback. */ + re_string_set_index (regexp, start_idx); + *token = start_token; + token->type = CHARACTER; + /* mb_partial and word_char bits should be already initialized by + peek_token. */ + return elem; + } + + if (BE (end != -1 && start > end, 0)) + { + /* First number greater than second. */ + *err = REG_BADBR; + return NULL; + } + } + else + { + start = (token->type == OP_DUP_PLUS) ? 1 : 0; + end = (token->type == OP_DUP_QUESTION) ? 1 : -1; + } + + fetch_token (token, regexp, syntax); + + if (BE (elem == NULL, 0)) + return NULL; + if (BE (start == 0 && end == 0, 0)) + { + postorder (elem, free_tree, NULL); + return NULL; + } + + /* Extract "{n,m}" to "...{0,}". */ + if (BE (start > 0, 0)) + { + tree = elem; + for (i = 2; i <= start; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT); + if (BE (elem == NULL || tree == NULL, 0)) + goto parse_dup_op_espace; + } + + if (start == end) + return tree; + + /* Duplicate ELEM before it is marked optional. */ + elem = duplicate_tree (elem, dfa); + old_tree = tree; + } + else + old_tree = NULL; + + if (elem->token.type == SUBEXP) + postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx); + + tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT)); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + + /* This loop is actually executed only when end != -1, + to rewrite {0,n} as ((...?)?)?... We have + already created the start+1-th copy. */ + for (i = start + 2; i <= end; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT); + if (BE (elem == NULL || tree == NULL, 0)) + goto parse_dup_op_espace; + + tree = create_tree (dfa, tree, NULL, OP_ALT); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + } + + if (old_tree) + tree = create_tree (dfa, old_tree, tree, CONCAT); + + return tree; + + parse_dup_op_espace: + *err = REG_ESPACE; + return NULL; +} + +/* Size of the names for collating symbol/equivalence_class/character_class. + I'm not sure, but maybe enough. */ +#define BRACKET_NAME_BUF_SIZE 32 + +#ifndef _LIBC + /* Local function for parse_bracket_exp only used in case of NOT _LIBC. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + +static reg_errcode_t +internal_function +# ifdef RE_ENABLE_I18N +build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, + bracket_elem_t *start_elem, bracket_elem_t *end_elem) +# else /* not RE_ENABLE_I18N */ +build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, + bracket_elem_t *end_elem) +# endif /* not RE_ENABLE_I18N */ +{ + unsigned int start_ch, end_ch; + /* Equivalence Classes and Character Classes can't be a range start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + /* We can handle no multi character collating elements without libc + support. */ + if (BE ((start_elem->type == COLL_SYM + && strlen ((char *) start_elem->opr.name) > 1) + || (end_elem->type == COLL_SYM + && strlen ((char *) end_elem->opr.name) > 1), 0)) + return REG_ECOLLATE; + +# ifdef RE_ENABLE_I18N + { + wchar_t wc; + wint_t start_wc; + wint_t end_wc; + wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + + start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); + start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) + ? __btowc (start_ch) : start_elem->opr.wch); + end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) + ? __btowc (end_ch) : end_elem->opr.wch); + if (start_wc == WEOF || end_wc == WEOF) + return REG_ECOLLATE; + cmp_buf[0] = start_wc; + cmp_buf[4] = end_wc; + if (wcscoll (cmp_buf, cmp_buf + 4) > 0) + return REG_ERANGE; + + /* Got valid collation sequence values, add them as a new entry. + However, for !_LIBC we have no collation elements: if the + character set is single byte, the single byte character set + that we build below suffices. parse_bracket_exp passes + no MBCSET if dfa->mb_cur_max == 1. */ + if (mbcset) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + wchar_t *new_array_start, *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + /* Use realloc since mbcset->range_starts and mbcset->range_ends + are NULL if *range_alloc == 0. */ + new_array_start = re_realloc (mbcset->range_starts, wchar_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, wchar_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_wc; + mbcset->range_ends[mbcset->nranges++] = end_wc; + } + + /* Build the table for single byte characters. */ + for (wc = 0; wc < SBC_MAX; ++wc) + { + cmp_buf[2] = wc; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + bitset_set (sbcset, wc); + } + } +# else /* not RE_ENABLE_I18N */ + { + unsigned int ch; + start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); + if (start_ch > end_ch) + return REG_ERANGE; + /* Build the table for single byte characters. */ + for (ch = 0; ch < SBC_MAX; ++ch) + if (start_ch <= ch && ch <= end_ch) + bitset_set (sbcset, ch); + } +# endif /* not RE_ENABLE_I18N */ + return REG_NOERROR; +} +#endif /* not _LIBC */ + +#ifndef _LIBC +/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument since we may update it. */ + +static reg_errcode_t +internal_function +# ifdef RE_ENABLE_I18N +build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, + int *coll_sym_alloc, const unsigned char *name) +# else /* not RE_ENABLE_I18N */ +build_collating_symbol (bitset_t sbcset, const unsigned char *name) +# endif /* not RE_ENABLE_I18N */ +{ + size_t name_len = strlen ((const char *) name); + if (BE (name_len != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } +} +#endif /* not _LIBC */ + +/* This function parse bracket expression like "[abc]", "[a-c]", + "[[.a-a.]]" etc. */ + +static bin_tree_t * +parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err) +{ +#ifdef _LIBC + const unsigned char *collseqmb; + const char *collseqwc; + uint32_t nrules; + int32_t table_size; + const int32_t *symb_table; + const unsigned char *extra; + + /* Local function for parse_bracket_exp used in _LIBC environement. + Seek the collating symbol entry correspondings to NAME. + Return the index of the symbol in the SYMB_TABLE. */ + + auto inline int32_t + __attribute ((always_inline)) + seek_collating_symbol_entry (name, name_len) + const unsigned char *name; + size_t name_len; + { + int32_t hash = elem_hash ((const char *) name, name_len); + int32_t elem = hash % table_size; + if (symb_table[2 * elem] != 0) + { + int32_t second = hash % (table_size - 2) + 1; + + do + { + /* First compare the hashing value. */ + if (symb_table[2 * elem] == hash + /* Compare the length of the name. */ + && name_len == extra[symb_table[2 * elem + 1]] + /* Compare the name. */ + && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], + name_len) == 0) + { + /* Yep, this is the entry. */ + break; + } + + /* Next entry. */ + elem += second; + } + while (symb_table[2 * elem] != 0); + } + return elem; + } + + /* Local function for parse_bracket_exp used in _LIBC environment. + Look up the collation sequence value of BR_ELEM. + Return the value if succeeded, UINT_MAX otherwise. */ + + auto inline unsigned int + __attribute ((always_inline)) + lookup_collation_sequence_value (br_elem) + bracket_elem_t *br_elem; + { + if (br_elem->type == SB_CHAR) + { + /* + if (MB_CUR_MAX == 1) + */ + if (nrules == 0) + return collseqmb[br_elem->opr.ch]; + else + { + wint_t wc = __btowc (br_elem->opr.ch); + return __collseq_table_lookup (collseqwc, wc); + } + } + else if (br_elem->type == MB_CHAR) + { + if (nrules != 0) + return __collseq_table_lookup (collseqwc, br_elem->opr.wch); + } + else if (br_elem->type == COLL_SYM) + { + size_t sym_name_len = strlen ((char *) br_elem->opr.name); + if (nrules != 0) + { + int32_t elem, idx; + elem = seek_collating_symbol_entry (br_elem->opr.name, + sym_name_len); + if (symb_table[2 * elem] != 0) + { + /* We found the entry. */ + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + /* Skip the byte sequence of the collating element. */ + idx += 1 + extra[idx]; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + /* Skip the multibyte collation sequence value. */ + idx += sizeof (unsigned int); + /* Skip the wide char sequence of the collating element. */ + idx += sizeof (unsigned int) * + (1 + *(unsigned int *) (extra + idx)); + /* Return the collation sequence value. */ + return *(unsigned int *) (extra + idx); + } + else if (symb_table[2 * elem] == 0 && sym_name_len == 1) + { + /* No valid character. Match it as a single byte + character. */ + return collseqmb[br_elem->opr.name[0]]; + } + } + else if (sym_name_len == 1) + return collseqmb[br_elem->opr.name[0]]; + } + return UINT_MAX; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + + auto inline reg_errcode_t + __attribute ((always_inline)) + build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) + re_charset_t *mbcset; + int *range_alloc; + bitset_t sbcset; + bracket_elem_t *start_elem, *end_elem; + { + unsigned int ch; + uint32_t start_collseq; + uint32_t end_collseq; + + /* Equivalence Classes and Character Classes can't be a range + start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + start_collseq = lookup_collation_sequence_value (start_elem); + end_collseq = lookup_collation_sequence_value (end_elem); + /* Check start/end collation sequence values. */ + if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) + return REG_ECOLLATE; + if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) + return REG_ERANGE; + + /* Got valid collation sequence values, add them as a new entry. + However, if we have no collation elements, and the character set + is single byte, the single byte character set that we + build below suffices. */ + if (nrules > 0 || dfa->mb_cur_max > 1) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + uint32_t *new_array_start; + uint32_t *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + new_array_start = re_realloc (mbcset->range_starts, uint32_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, uint32_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_collseq; + mbcset->range_ends[mbcset->nranges++] = end_collseq; + } + + /* Build the table for single byte characters. */ + for (ch = 0; ch < SBC_MAX; ch++) + { + uint32_t ch_collseq; + /* + if (MB_CUR_MAX == 1) + */ + if (nrules == 0) + ch_collseq = collseqmb[ch]; + else + ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); + if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) + bitset_set (sbcset, ch); + } + return REG_NOERROR; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument sinse we may update it. */ + + auto inline reg_errcode_t + __attribute ((always_inline)) + build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) + re_charset_t *mbcset; + int *coll_sym_alloc; + bitset_t sbcset; + const unsigned char *name; + { + int32_t elem, idx; + size_t name_len = strlen ((const char *) name); + if (nrules != 0) + { + elem = seek_collating_symbol_entry (name, name_len); + if (symb_table[2 * elem] != 0) + { + /* We found the entry. */ + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + } + else if (symb_table[2 * elem] == 0 && name_len == 1) + { + /* No valid character, treat it as a normal + character. */ + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } + else + return REG_ECOLLATE; + + /* Got valid collation sequence, add it as a new entry. */ + /* Check the space of the arrays. */ + if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->ncoll_syms is 0. */ + int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; + /* Use realloc since mbcset->coll_syms is NULL + if *alloc == 0. */ + int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, + new_coll_sym_alloc); + if (BE (new_coll_syms == NULL, 0)) + return REG_ESPACE; + mbcset->coll_syms = new_coll_syms; + *coll_sym_alloc = new_coll_sym_alloc; + } + mbcset->coll_syms[mbcset->ncoll_syms++] = idx; + return REG_NOERROR; + } + else + { + if (BE (name_len != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } + } + } +#endif + + re_token_t br_token; + re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; + int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; + int equiv_class_alloc = 0, char_class_alloc = 0; +#endif /* not RE_ENABLE_I18N */ + int non_match = 0; + bin_tree_t *work_tree; + int token_len; + int first_round = 1; +#ifdef _LIBC + collseqmb = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); + nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules) + { + /* + if (MB_CUR_MAX > 1) + */ + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); + symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_SYMB_TABLEMB); + extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_SYMB_EXTRAMB); + } +#endif + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); +#ifdef RE_ENABLE_I18N + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ +#ifdef RE_ENABLE_I18N + if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else + if (BE (sbcset == NULL, 0)) +#endif /* RE_ENABLE_I18N */ + { + *err = REG_ESPACE; + return NULL; + } + + token_len = peek_token_bracket (token, regexp, syntax); + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_BADPAT; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_NON_MATCH_LIST) + { +#ifdef RE_ENABLE_I18N + mbcset->non_match = 1; +#endif /* not RE_ENABLE_I18N */ + non_match = 1; + if (syntax & RE_HAT_LISTS_NOT_NEWLINE) + bitset_set (sbcset, '\n'); + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + token_len = peek_token_bracket (token, regexp, syntax); + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_BADPAT; + goto parse_bracket_exp_free_return; + } + } + + /* We treat the first ']' as a normal character. */ + if (token->type == OP_CLOSE_BRACKET) + token->type = CHARACTER; + + while (1) + { + bracket_elem_t start_elem, end_elem; + unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE]; + unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE]; + reg_errcode_t ret; + int token_len2 = 0, is_range_exp = 0; + re_token_t token2; + + start_elem.opr.name = start_name_buf; + ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, + syntax, first_round); + if (BE (ret != REG_NOERROR, 0)) + { + *err = ret; + goto parse_bracket_exp_free_return; + } + first_round = 0; + + /* Get information about the next token. We need it in any case. */ + token_len = peek_token_bracket (token, regexp, syntax); + + /* Do not check for ranges if we know they are not allowed. */ + if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS) + { + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_CHARSET_RANGE) + { + re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ + token_len2 = peek_token_bracket (&token2, regexp, syntax); + if (BE (token2.type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token2.type == OP_CLOSE_BRACKET) + { + /* We treat the last '-' as a normal character. */ + re_string_skip_bytes (regexp, -token_len); + token->type = CHARACTER; + } + else + is_range_exp = 1; + } + } + + if (is_range_exp == 1) + { + end_elem.opr.name = end_name_buf; + ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, + dfa, syntax, 1); + if (BE (ret != REG_NOERROR, 0)) + { + *err = ret; + goto parse_bracket_exp_free_return; + } + + token_len = peek_token_bracket (token, regexp, syntax); + +#ifdef _LIBC + *err = build_range_exp (sbcset, mbcset, &range_alloc, + &start_elem, &end_elem); +#else +# ifdef RE_ENABLE_I18N + *err = build_range_exp (sbcset, + dfa->mb_cur_max > 1 ? mbcset : NULL, + &range_alloc, &start_elem, &end_elem); +# else + *err = build_range_exp (sbcset, &start_elem, &end_elem); +# endif +#endif /* RE_ENABLE_I18N */ + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + } + else + { + switch (start_elem.type) + { + case SB_CHAR: + bitset_set (sbcset, start_elem.opr.ch); + break; +#ifdef RE_ENABLE_I18N + case MB_CHAR: + /* Check whether the array has enough space. */ + if (BE (mbchar_alloc == mbcset->nmbchars, 0)) + { + wchar_t *new_mbchars; + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nmbchars is 0. */ + mbchar_alloc = 2 * mbcset->nmbchars + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + new_mbchars = re_realloc (mbcset->mbchars, wchar_t, + mbchar_alloc); + if (BE (new_mbchars == NULL, 0)) + goto parse_bracket_exp_espace; + mbcset->mbchars = new_mbchars; + } + mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; + break; +#endif /* RE_ENABLE_I18N */ + case EQUIV_CLASS: + *err = build_equiv_class (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &equiv_class_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + case COLL_SYM: + *err = build_collating_symbol (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &coll_sym_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + case CHAR_CLASS: + *err = build_charclass (regexp->trans, sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &char_class_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name, syntax); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + default: + assert (0); + break; + } + } + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_CLOSE_BRACKET) + break; + } + + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + + /* If it is non-matching list. */ + if (non_match) + bitset_not (sbcset); + +#ifdef RE_ENABLE_I18N + /* Ensure only single byte characters are set. */ + if (dfa->mb_cur_max > 1) + bitset_mask (sbcset, dfa->sb_char); + + if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes + || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes + || mbcset->non_match))) + { + bin_tree_t *mbc_tree; + int sbc_idx; + /* Build a tree for complex bracket. */ + dfa->has_mb_node = 1; + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto parse_bracket_exp_espace; + for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) + if (sbcset[sbc_idx]) + break; + /* If there are no bits set in sbcset, there is no point + of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ + if (sbc_idx < BITSET_WORDS) + { + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + + /* Then join them by ALT node. */ + work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + else + { + re_free (sbcset); + work_tree = mbc_tree; + } + } + else +#endif /* not RE_ENABLE_I18N */ + { +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + return work_tree; + + parse_bracket_exp_espace: + *err = REG_ESPACE; + parse_bracket_exp_free_return: + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + return NULL; +} + +/* Parse an element in the bracket expression. */ + +static reg_errcode_t +parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token, int token_len, re_dfa_t *dfa, + reg_syntax_t syntax, int accept_hyphen) +{ +#ifdef RE_ENABLE_I18N + int cur_char_size; + cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); + if (cur_char_size > 1) + { + elem->type = MB_CHAR; + elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp)); + re_string_skip_bytes (regexp, cur_char_size); + return REG_NOERROR; + } +#endif /* RE_ENABLE_I18N */ + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS + || token->type == OP_OPEN_EQUIV_CLASS) + return parse_bracket_symbol (elem, regexp, token); + if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen) + { + /* A '-' must only appear as anything but a range indicator before + the closing bracket. Everything else is an error. */ + re_token_t token2; + (void) peek_token_bracket (&token2, regexp, syntax); + if (token2.type != OP_CLOSE_BRACKET) + /* The actual error value is not standardized since this whole + case is undefined. But ERANGE makes good sense. */ + return REG_ERANGE; + } + elem->type = SB_CHAR; + elem->opr.ch = token->opr.c; + return REG_NOERROR; +} + +/* Parse a bracket symbol in the bracket expression. Bracket symbols are + such as [::], [..], and + [==]. */ + +static reg_errcode_t +parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token) +{ + unsigned char ch, delim = token->opr.c; + int i = 0; + if (re_string_eoi(regexp)) + return REG_EBRACK; + for (;; ++i) + { + if (i >= BRACKET_NAME_BUF_SIZE) + return REG_EBRACK; + if (token->type == OP_OPEN_CHAR_CLASS) + ch = re_string_fetch_byte_case (regexp); + else + ch = re_string_fetch_byte (regexp); + if (re_string_eoi(regexp)) + return REG_EBRACK; + if (ch == delim && re_string_peek_byte (regexp, 0) == ']') + break; + elem->opr.name[i] = ch; + } + re_string_skip_bytes (regexp, 1); + elem->opr.name[i] = '\0'; + switch (token->type) + { + case OP_OPEN_COLL_ELEM: + elem->type = COLL_SYM; + break; + case OP_OPEN_EQUIV_CLASS: + elem->type = EQUIV_CLASS; + break; + case OP_OPEN_CHAR_CLASS: + elem->type = CHAR_CLASS; + break; + default: + break; + } + return REG_NOERROR; +} + + /* Helper function for parse_bracket_exp. + Build the equivalence class which is represented by NAME. + The result are written to MBCSET and SBCSET. + EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes, + is a pointer argument sinse we may update it. */ + +static reg_errcode_t +#ifdef RE_ENABLE_I18N +build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, + int *equiv_class_alloc, const unsigned char *name) +#else /* not RE_ENABLE_I18N */ +build_equiv_class (bitset_t sbcset, const unsigned char *name) +#endif /* not RE_ENABLE_I18N */ +{ +#ifdef _LIBC + uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules != 0) + { + const int32_t *table, *indirect; + const unsigned char *weights, *extra, *cp; + unsigned char char_buf[2]; + int32_t idx1, idx2; + unsigned int ch; + size_t len; + /* This #include defines a local function! */ +# include + /* Calculate the index for equivalence class. */ + cp = name; + table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_WEIGHTMB); + extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_INDIRECTMB); + idx1 = findidx (&cp); + if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)) + /* This isn't a valid character. */ + return REG_ECOLLATE; + + /* Build single byte matcing table for this equivalence class. */ + char_buf[1] = (unsigned char) '\0'; + len = weights[idx1 & 0xffffff]; + for (ch = 0; ch < SBC_MAX; ++ch) + { + char_buf[0] = ch; + cp = char_buf; + idx2 = findidx (&cp); +/* + idx2 = table[ch]; +*/ + if (idx2 == 0) + /* This isn't a valid character. */ + continue; + /* Compare only if the length matches and the collation rule + index is the same. */ + if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24)) + { + int cnt = 0; + + while (cnt <= len && + weights[(idx1 & 0xffffff) + 1 + cnt] + == weights[(idx2 & 0xffffff) + 1 + cnt]) + ++cnt; + + if (cnt > len) + bitset_set (sbcset, ch); + } + } + /* Check whether the array has enough space. */ + if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nequiv_classes is 0. */ + int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; + /* Use realloc since the array is NULL if *alloc == 0. */ + int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, + int32_t, + new_equiv_class_alloc); + if (BE (new_equiv_classes == NULL, 0)) + return REG_ESPACE; + mbcset->equiv_classes = new_equiv_classes; + *equiv_class_alloc = new_equiv_class_alloc; + } + mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; + } + else +#endif /* _LIBC */ + { + if (BE (strlen ((const char *) name) != 1, 0)) + return REG_ECOLLATE; + bitset_set (sbcset, *name); + } + return REG_NOERROR; +} + + /* Helper function for parse_bracket_exp. + Build the character class which is represented by NAME. + The result are written to MBCSET and SBCSET. + CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes, + is a pointer argument sinse we may update it. */ + +static reg_errcode_t +#ifdef RE_ENABLE_I18N +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + re_charset_t *mbcset, int *char_class_alloc, + const unsigned char *class_name, reg_syntax_t syntax) +#else /* not RE_ENABLE_I18N */ +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + const unsigned char *class_name, reg_syntax_t syntax) +#endif /* not RE_ENABLE_I18N */ +{ + int i; + const char *name = (const char *) class_name; + + /* In case of REG_ICASE "upper" and "lower" match the both of + upper and lower cases. */ + if ((syntax & RE_ICASE) + && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) + name = "alpha"; + +#ifdef RE_ENABLE_I18N + /* Check the space of the arrays. */ + if (BE (*char_class_alloc == mbcset->nchar_classes, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nchar_classes is 0. */ + int new_char_class_alloc = 2 * mbcset->nchar_classes + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, + new_char_class_alloc); + if (BE (new_char_classes == NULL, 0)) + return REG_ESPACE; + mbcset->char_classes = new_char_classes; + *char_class_alloc = new_char_class_alloc; + } + mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); +#endif /* RE_ENABLE_I18N */ + +#define BUILD_CHARCLASS_LOOP(ctype_func) \ + do { \ + if (BE (trans != NULL, 0)) \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, trans[i]); \ + } \ + else \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, i); \ + } \ + } while (0) + + if (strcmp (name, "alnum") == 0) + BUILD_CHARCLASS_LOOP (isalnum); + else if (strcmp (name, "cntrl") == 0) + BUILD_CHARCLASS_LOOP (iscntrl); + else if (strcmp (name, "lower") == 0) + BUILD_CHARCLASS_LOOP (islower); + else if (strcmp (name, "space") == 0) + BUILD_CHARCLASS_LOOP (isspace); + else if (strcmp (name, "alpha") == 0) + BUILD_CHARCLASS_LOOP (isalpha); + else if (strcmp (name, "digit") == 0) + BUILD_CHARCLASS_LOOP (isdigit); + else if (strcmp (name, "print") == 0) + BUILD_CHARCLASS_LOOP (isprint); + else if (strcmp (name, "upper") == 0) + BUILD_CHARCLASS_LOOP (isupper); + else if (strcmp (name, "blank") == 0) + BUILD_CHARCLASS_LOOP (isblank); + else if (strcmp (name, "graph") == 0) + BUILD_CHARCLASS_LOOP (isgraph); + else if (strcmp (name, "punct") == 0) + BUILD_CHARCLASS_LOOP (ispunct); + else if (strcmp (name, "xdigit") == 0) + BUILD_CHARCLASS_LOOP (isxdigit); + else + return REG_ECTYPE; + + return REG_NOERROR; +} + +static bin_tree_t * +build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, + const unsigned char *class_name, + const unsigned char *extra, int non_match, + reg_errcode_t *err) +{ + re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; + int alloc = 0; +#endif /* not RE_ENABLE_I18N */ + reg_errcode_t ret; + re_token_t br_token; + bin_tree_t *tree; + + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); +#ifdef RE_ENABLE_I18N + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ + +#ifdef RE_ENABLE_I18N + if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else /* not RE_ENABLE_I18N */ + if (BE (sbcset == NULL, 0)) +#endif /* not RE_ENABLE_I18N */ + { + *err = REG_ESPACE; + return NULL; + } + + if (non_match) + { +#ifdef RE_ENABLE_I18N + mbcset->non_match = 1; +#endif /* not RE_ENABLE_I18N */ + } + + /* We don't care the syntax in this case. */ + ret = build_charclass (trans, sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &alloc, +#endif /* RE_ENABLE_I18N */ + class_name, 0); + + if (BE (ret != REG_NOERROR, 0)) + { + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + *err = ret; + return NULL; + } + /* \w match '_' also. */ + for (; *extra; extra++) + bitset_set (sbcset, *extra); + + /* If it is non-matching list. */ + if (non_match) + bitset_not (sbcset); + +#ifdef RE_ENABLE_I18N + /* Ensure only single byte characters are set. */ + if (dfa->mb_cur_max > 1) + bitset_mask (sbcset, dfa->sb_char); +#endif + + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (tree == NULL, 0)) + goto build_word_op_espace; + +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + bin_tree_t *mbc_tree; + /* Build a tree for complex bracket. */ + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + dfa->has_mb_node = 1; + mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto build_word_op_espace; + /* Then join them by ALT node. */ + tree = create_tree (dfa, tree, mbc_tree, OP_ALT); + if (BE (mbc_tree != NULL, 1)) + return tree; + } + else + { + free_charset (mbcset); + return tree; + } +#else /* not RE_ENABLE_I18N */ + return tree; +#endif /* not RE_ENABLE_I18N */ + + build_word_op_espace: + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + *err = REG_ESPACE; + return NULL; +} + +/* This is intended for the expressions like "a{1,3}". + Fetch a number from `input', and return the number. + Return -1, if the number field is empty like "{,1}". + Return -2, If an error is occured. */ + +static int +fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) +{ + int num = -1; + unsigned char c; + while (1) + { + fetch_token (token, input, syntax); + c = token->opr.c; + if (BE (token->type == END_OF_RE, 0)) + return -2; + if (token->type == OP_CLOSE_DUP_NUM || c == ',') + break; + num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2) + ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0')); + num = (num > RE_DUP_MAX) ? -2 : num; + } + return num; +} + +#ifdef RE_ENABLE_I18N +static void +free_charset (re_charset_t *cset) +{ + re_free (cset->mbchars); +# ifdef _LIBC + re_free (cset->coll_syms); + re_free (cset->equiv_classes); + re_free (cset->range_starts); + re_free (cset->range_ends); +# endif + re_free (cset->char_classes); + re_free (cset); +} +#endif /* RE_ENABLE_I18N */ + +/* Functions for binary tree operation. */ + +/* Create a tree node. */ + +static bin_tree_t * +create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + re_token_type_t type) +{ + re_token_t t; + t.type = type; + return create_token_tree (dfa, left, right, &t); +} + +static bin_tree_t * +create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + const re_token_t *token) +{ + bin_tree_t *tree; + if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)) + { + bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1); + + if (storage == NULL) + return NULL; + storage->next = dfa->str_tree_storage; + dfa->str_tree_storage = storage; + dfa->str_tree_storage_idx = 0; + } + tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++]; + + tree->parent = NULL; + tree->left = left; + tree->right = right; + tree->token = *token; + tree->token.duplicated = 0; + tree->token.opt_subexp = 0; + tree->first = NULL; + tree->next = NULL; + tree->node_idx = -1; + + if (left != NULL) + left->parent = tree; + if (right != NULL) + right->parent = tree; + return tree; +} + +/* Mark the tree SRC as an optional subexpression. + To be called from preorder or postorder. */ + +static reg_errcode_t +mark_opt_subexp (void *extra, bin_tree_t *node) +{ + int idx = (int) (long) extra; + if (node->token.type == SUBEXP && node->token.opr.idx == idx) + node->token.opt_subexp = 1; + + return REG_NOERROR; +} + +/* Free the allocated memory inside NODE. */ + +static void +free_token (re_token_t *node) +{ +#ifdef RE_ENABLE_I18N + if (node->type == COMPLEX_BRACKET && node->duplicated == 0) + free_charset (node->opr.mbcset); + else +#endif /* RE_ENABLE_I18N */ + if (node->type == SIMPLE_BRACKET && node->duplicated == 0) + re_free (node->opr.sbcset); +} + +/* Worker function for tree walking. Free the allocated memory inside NODE + and its children. */ + +static reg_errcode_t +free_tree (void *extra, bin_tree_t *node) +{ + free_token (&node->token); + return REG_NOERROR; +} + + +/* Duplicate the node SRC, and return new node. This is a preorder + visit similar to the one implemented by the generic visitor, but + we need more infrastructure to maintain two parallel trees --- so, + it's easier to duplicate. */ + +static bin_tree_t * +duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa) +{ + const bin_tree_t *node; + bin_tree_t *dup_root; + bin_tree_t **p_new = &dup_root, *dup_node = root->parent; + + for (node = root; ; ) + { + /* Create a new tree and link it back to the current parent. */ + *p_new = create_token_tree (dfa, NULL, NULL, &node->token); + if (*p_new == NULL) + return NULL; + (*p_new)->parent = dup_node; + (*p_new)->token.duplicated = 1; + dup_node = *p_new; + + /* Go to the left node, or up and to the right. */ + if (node->left) + { + node = node->left; + p_new = &dup_node->left; + } + else + { + const bin_tree_t *prev = NULL; + while (node->right == prev || node->right == NULL) + { + prev = node; + node = node->parent; + dup_node = dup_node->parent; + if (!node) + return dup_root; + } + node = node->right; + p_new = &dup_node->right; + } + } +} diff --git a/gnu_regex/regex.c b/gnu_regex/regex.c new file mode 100644 index 0000000..bec9f9d --- /dev/null +++ b/gnu_regex/regex.c @@ -0,0 +1,74 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +/* Make sure noone compiles this code with a C++ compiler. */ +#ifdef __cplusplus +# error "This is C code, use a C compiler" +#endif + +#ifdef _LIBC +/* We have to keep the namespace clean. */ +# define regfree(preg) __regfree (preg) +# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) +# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) +# define regerror(errcode, preg, errbuf, errbuf_size) \ + __regerror(errcode, preg, errbuf, errbuf_size) +# define re_set_registers(bu, re, nu, st, en) \ + __re_set_registers (bu, re, nu, st, en) +# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ + __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) +# define re_match(bufp, string, size, pos, regs) \ + __re_match (bufp, string, size, pos, regs) +# define re_search(bufp, string, size, startpos, range, regs) \ + __re_search (bufp, string, size, startpos, range, regs) +# define re_compile_pattern(pattern, length, bufp) \ + __re_compile_pattern (pattern, length, bufp) +# define re_set_syntax(syntax) __re_set_syntax (syntax) +# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ + __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) +# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) + +# include "../locale/localeinfo.h" +#endif + +/* On some systems, limits.h sets RE_DUP_MAX to a lower value than + GNU regex allows. Include it before , which correctly + #undefs RE_DUP_MAX and sets it to the right value. */ +#include + +#include "regex.h" +#include "regex_internal.h" + +#include "regex_internal.c" +#include "regcomp.c" +#include "regexec.c" + +/* Binary backward compatibility. */ +#if _LIBC +# include +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3) +link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.") +int re_max_failures = 2000; +# endif +#endif diff --git a/gnu_regex/regex.h b/gnu_regex/regex.h new file mode 100644 index 0000000..2132772 --- /dev/null +++ b/gnu_regex/regex.h @@ -0,0 +1,575 @@ +/* Definitions for data structures and routines for the regular + expression library. + Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006,2008 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _REGEX_H +#define _REGEX_H 1 + +#include + +/* Allow the use in C++ code. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* The following two types have to be signed and unsigned integer type + wide enough to hold a value of a pointer. For most ANSI compilers + ptrdiff_t and size_t should be likely OK. Still size of these two + types is 2 for Microsoft C. Ugh... */ +typedef long int s_reg_t; +typedef unsigned long int active_reg_t; + +/* The following bits are used to determine the regexp syntax we + recognize. The set/not-set meanings are chosen so that Emacs syntax + remains the value 0. The bits are given in alphabetical order, and + the definitions shifted by one from the previous bit; thus, when we + add or remove a bit, only one other definition need change. */ +typedef unsigned long int reg_syntax_t; + +#ifdef __USE_GNU +/* If this bit is not set, then \ inside a bracket expression is literal. + If set, then such a \ quotes the following character. */ +# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) + +/* If this bit is not set, then + and ? are operators, and \+ and \? are + literals. + If set, then \+ and \? are operators and + and ? are literals. */ +# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) + +/* If this bit is set, then character classes are supported. They are: + [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], + [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. + If not set, then character classes are not supported. */ +# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) + +/* If this bit is set, then ^ and $ are always anchors (outside bracket + expressions, of course). + If this bit is not set, then it depends: + ^ is an anchor if it is at the beginning of a regular + expression or after an open-group or an alternation operator; + $ is an anchor if it is at the end of a regular expression, or + before a close-group or an alternation operator. + + This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because + POSIX draft 11.2 says that * etc. in leading positions is undefined. + We already implemented a previous draft which made those constructs + invalid, though, so we haven't changed the code back. */ +# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) + +/* If this bit is set, then special characters are always special + regardless of where they are in the pattern. + If this bit is not set, then special characters are special only in + some contexts; otherwise they are ordinary. Specifically, + * + ? and intervals are only special when not after the beginning, + open-group, or alternation operator. */ +# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) + +/* If this bit is set, then *, +, ?, and { cannot be first in an re or + immediately after an alternation or begin-group operator. */ +# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) + +/* If this bit is set, then . matches newline. + If not set, then it doesn't. */ +# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) + +/* If this bit is set, then . doesn't match NUL. + If not set, then it does. */ +# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) + +/* If this bit is set, nonmatching lists [^...] do not match newline. + If not set, they do. */ +# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) + +/* If this bit is set, either \{...\} or {...} defines an + interval, depending on RE_NO_BK_BRACES. + If not set, \{, \}, {, and } are literals. */ +# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) + +/* If this bit is set, +, ? and | aren't recognized as operators. + If not set, they are. */ +# define RE_LIMITED_OPS (RE_INTERVALS << 1) + +/* If this bit is set, newline is an alternation operator. + If not set, newline is literal. */ +# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) + +/* If this bit is set, then `{...}' defines an interval, and \{ and \} + are literals. + If not set, then `\{...\}' defines an interval. */ +# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) + +/* If this bit is set, (...) defines a group, and \( and \) are literals. + If not set, \(...\) defines a group, and ( and ) are literals. */ +# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) + +/* If this bit is set, then \ matches . + If not set, then \ is a back-reference. */ +# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) + +/* If this bit is set, then | is an alternation operator, and \| is literal. + If not set, then \| is an alternation operator, and | is literal. */ +# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) + +/* If this bit is set, then an ending range point collating higher + than the starting range point, as in [z-a], is invalid. + If not set, then when ending range point collates higher than the + starting range point, the range is ignored. */ +# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) + +/* If this bit is set, then an unmatched ) is ordinary. + If not set, then an unmatched ) is invalid. */ +# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) + +/* If this bit is set, succeed as soon as we match the whole pattern, + without further backtracking. */ +# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) + +/* If this bit is set, do not process the GNU regex operators. + If not set, then the GNU regex operators are recognized. */ +# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) + +/* If this bit is set, turn on internal regex debugging. + If not set, and debugging was on, turn it off. + This only works if regex.c is compiled -DDEBUG. + We define this bit always, so that all that's needed to turn on + debugging is to recompile regex.c; the calling code can always have + this bit set, and it won't affect anything in the normal case. */ +# define RE_DEBUG (RE_NO_GNU_OPS << 1) + +/* If this bit is set, a syntactically invalid interval is treated as + a string of ordinary characters. For example, the ERE 'a{1' is + treated as 'a\{1'. */ +# define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) + +/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only + for ^, because it is difficult to scan the regex backwards to find + whether ^ should be special. */ +# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) + +/* If this bit is set, then \{ cannot be first in an bre or + immediately after an alternation or begin-group operator. */ +# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) + +/* If this bit is set, then no_sub will be set to 1 during + re_compile_pattern. */ +# define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) +#endif + +/* This global variable defines the particular regexp syntax to use (for + some interfaces). When a regexp is compiled, the syntax used is + stored in the pattern buffer, so changing this does not affect + already-compiled regexps. */ +extern reg_syntax_t re_syntax_options; + +#ifdef __USE_GNU +/* Define combinations of the above bits for the standard possibilities. + (The [[[ comments delimit what gets put into the Texinfo file, so + don't delete them!) */ +/* [[[begin syntaxes]]] */ +#define RE_SYNTAX_EMACS 0 + +#define RE_SYNTAX_AWK \ + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ + | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ + | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GNU_AWK \ + ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ + & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ + | RE_CONTEXT_INVALID_OPS )) + +#define RE_SYNTAX_POSIX_AWK \ + (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ + | RE_INTERVALS | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GREP \ + (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ + | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ + | RE_NEWLINE_ALT) + +#define RE_SYNTAX_EGREP \ + (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ + | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ + | RE_NO_BK_VBAR) + +#define RE_SYNTAX_POSIX_EGREP \ + (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ + | RE_INVALID_INTERVAL_ORD) + +/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ +#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC + +#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC + +/* Syntax bits common to both basic and extended POSIX regex syntax. */ +#define _RE_SYNTAX_POSIX_COMMON \ + (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ + | RE_INTERVALS | RE_NO_EMPTY_RANGES) + +#define RE_SYNTAX_POSIX_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) + +/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes + RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this + isn't minimal, since other operators, such as \`, aren't disabled. */ +#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) + +#define RE_SYNTAX_POSIX_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) + +/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is + removed and RE_NO_BK_REFS is added. */ +#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) +/* [[[end syntaxes]]] */ + +/* Maximum number of duplicates an interval can allow. Some systems + (erroneously) define this in other header files, but we want our + value, so remove any previous define. */ +# ifdef RE_DUP_MAX +# undef RE_DUP_MAX +# endif +/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ +# define RE_DUP_MAX (0x7fff) +#endif + + +/* POSIX `cflags' bits (i.e., information for `regcomp'). */ + +/* If this bit is set, then use extended regular expression syntax. + If not set, then use basic regular expression syntax. */ +#define REG_EXTENDED 1 + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +#define REG_ICASE (REG_EXTENDED << 1) + +/* If this bit is set, then anchors do not match at newline + characters in the string. + If not set, then anchors do match at newlines. */ +#define REG_NEWLINE (REG_ICASE << 1) + +/* If this bit is set, then report only success or fail in regexec. + If not set, then returns differ between not matching and errors. */ +#define REG_NOSUB (REG_NEWLINE << 1) + + +/* POSIX `eflags' bits (i.e., information for regexec). */ + +/* If this bit is set, then the beginning-of-line operator doesn't match + the beginning of the string (presumably because it's not the + beginning of a line). + If not set, then the beginning-of-line operator does match the + beginning of the string. */ +#define REG_NOTBOL 1 + +/* Like REG_NOTBOL, except for the end-of-line. */ +#define REG_NOTEOL (1 << 1) + +/* Use PMATCH[0] to delimit the start and end of the search in the + buffer. */ +#define REG_STARTEND (1 << 2) + + +/* If any error codes are removed, changed, or added, update the + `re_error_msg' table in regex.c. */ +typedef enum +{ +#if defined _XOPEN_SOURCE || defined __USE_XOPEN2K + REG_ENOSYS = -1, /* This will never happen for this implementation. */ +#endif + + REG_NOERROR = 0, /* Success. */ + REG_NOMATCH, /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ + REG_BADPAT, /* Invalid pattern. */ + REG_ECOLLATE, /* Inalid collating element. */ + REG_ECTYPE, /* Invalid character class name. */ + REG_EESCAPE, /* Trailing backslash. */ + REG_ESUBREG, /* Invalid back reference. */ + REG_EBRACK, /* Unmatched left bracket. */ + REG_EPAREN, /* Parenthesis imbalance. */ + REG_EBRACE, /* Unmatched \{. */ + REG_BADBR, /* Invalid contents of \{\}. */ + REG_ERANGE, /* Invalid range end. */ + REG_ESPACE, /* Ran out of memory. */ + REG_BADRPT, /* No preceding re for repetition op. */ + + /* Error codes we've added. */ + REG_EEND, /* Premature end. */ + REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ + REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ +} reg_errcode_t; + +/* This data structure represents a compiled pattern. Before calling + the pattern compiler, the fields `buffer', `allocated', `fastmap', + `translate', and `no_sub' can be set. After the pattern has been + compiled, the `re_nsub' field is available. All other fields are + private to the regex routines. */ + +#ifndef RE_TRANSLATE_TYPE +# define __RE_TRANSLATE_TYPE unsigned char * +# ifdef __USE_GNU +# define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE +# endif +#endif + +#ifdef __USE_GNU +# define __REPB_PREFIX(name) name +#else +# define __REPB_PREFIX(name) __##name +#endif + +struct re_pattern_buffer +{ + /* Space that holds the compiled pattern. It is declared as + `unsigned char *' because its elements are sometimes used as + array indexes. */ + unsigned char *__REPB_PREFIX(buffer); + + /* Number of bytes to which `buffer' points. */ + unsigned long int __REPB_PREFIX(allocated); + + /* Number of bytes actually used in `buffer'. */ + unsigned long int __REPB_PREFIX(used); + + /* Syntax setting with which the pattern was compiled. */ + reg_syntax_t __REPB_PREFIX(syntax); + + /* Pointer to a fastmap, if any, otherwise zero. re_search uses the + fastmap, if there is one, to skip over impossible starting points + for matches. */ + char *__REPB_PREFIX(fastmap); + + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation is + applied to a pattern when it is compiled and to a string when it + is matched. */ + __RE_TRANSLATE_TYPE __REPB_PREFIX(translate); + + /* Number of subexpressions found by the compiler. */ + size_t re_nsub; + + /* Zero if this pattern cannot match the empty string, one else. + Well, in truth it's used only in `re_search_2', to see whether or + not we should use the fastmap, so we don't set this absolutely + perfectly; see `re_compile_fastmap' (the `duplicate' case). */ + unsigned __REPB_PREFIX(can_be_null) : 1; + + /* If REGS_UNALLOCATED, allocate space in the `regs' structure + for `max (RE_NREGS, re_nsub + 1)' groups. + If REGS_REALLOCATE, reallocate space if necessary. + If REGS_FIXED, use what's there. */ +#ifdef __USE_GNU +# define REGS_UNALLOCATED 0 +# define REGS_REALLOCATE 1 +# define REGS_FIXED 2 +#endif + unsigned __REPB_PREFIX(regs_allocated) : 2; + + /* Set to zero when `regex_compile' compiles a pattern; set to one + by `re_compile_fastmap' if it updates the fastmap. */ + unsigned __REPB_PREFIX(fastmap_accurate) : 1; + + /* If set, `re_match_2' does not return information about + subexpressions. */ + unsigned __REPB_PREFIX(no_sub) : 1; + + /* If set, a beginning-of-line anchor doesn't match at the beginning + of the string. */ + unsigned __REPB_PREFIX(not_bol) : 1; + + /* Similarly for an end-of-line anchor. */ + unsigned __REPB_PREFIX(not_eol) : 1; + + /* If true, an anchor at a newline matches. */ + unsigned __REPB_PREFIX(newline_anchor) : 1; +}; + +typedef struct re_pattern_buffer regex_t; + +/* Type for byte offsets within the string. POSIX mandates this. */ +typedef int regoff_t; + + +#ifdef __USE_GNU +/* This is the structure we store register match data in. See + regex.texinfo for a full description of what registers match. */ +struct re_registers +{ + unsigned num_regs; + regoff_t *start; + regoff_t *end; +}; + + +/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, + `re_match_2' returns information about at least this many registers + the first time a `regs' structure is passed. */ +# ifndef RE_NREGS +# define RE_NREGS 30 +# endif +#endif + + +/* POSIX specification for registers. Aside from the different names than + `re_registers', POSIX uses an array of structures, instead of a + structure of arrays. */ +typedef struct +{ + regoff_t rm_so; /* Byte offset from string's start to substring's start. */ + regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ +} regmatch_t; + +/* Declarations for routines. */ + +#ifdef __USE_GNU +/* Sets the current default syntax to SYNTAX, and return the old syntax. + You can also simply assign to the `re_syntax_options' variable. */ +extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); + +/* Compile the regular expression PATTERN, with length LENGTH + and syntax given by the global `re_syntax_options', into the buffer + BUFFER. Return NULL if successful, and an error string if not. */ +extern const char *re_compile_pattern (const char *__pattern, size_t __length, + struct re_pattern_buffer *__buffer); + + +/* Compile a fastmap for the compiled pattern in BUFFER; used to + accelerate searches. Return 0 if successful and -2 if was an + internal error. */ +extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); + + +/* Search in the string STRING (with length LENGTH) for the pattern + compiled into BUFFER. Start searching at position START, for RANGE + characters. Return the starting position of the match, -1 for no + match, or -2 for an internal error. Also return register + information in REGS (if REGS and BUFFER->no_sub are nonzero). */ +extern int re_search (struct re_pattern_buffer *__buffer, const char *__string, + int __length, int __start, int __range, + struct re_registers *__regs); + + +/* Like `re_search', but search in the concatenation of STRING1 and + STRING2. Also, stop searching at index START + STOP. */ +extern int re_search_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + int __range, struct re_registers *__regs, int __stop); + + +/* Like `re_search', but return how many characters in STRING the regexp + in BUFFER matched, starting at position START. */ +extern int re_match (struct re_pattern_buffer *__buffer, const char *__string, + int __length, int __start, struct re_registers *__regs); + + +/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ +extern int re_match_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + struct re_registers *__regs, int __stop); + + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using BUFFER and REGS will use this memory + for recording register information. STARTS and ENDS must be + allocated with malloc, and must each be at least `NUM_REGS * sizeof + (regoff_t)' bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ +extern void re_set_registers (struct re_pattern_buffer *__buffer, + struct re_registers *__regs, + unsigned int __num_regs, + regoff_t *__starts, regoff_t *__ends); +#endif /* Use GNU */ + +#if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_BSD) +# ifndef _CRAY +/* 4.2 bsd compatibility. */ +extern char *re_comp (const char *); +extern int re_exec (const char *); +# endif +#endif + +/* GCC 2.95 and later have "__restrict"; C99 compilers have + "restrict", and "configure" may have defined "restrict". */ +#ifndef __restrict +# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) +# if defined restrict || 199901L <= __STDC_VERSION__ +# define __restrict restrict +# else +# define __restrict +# endif +# endif +#endif +/* gcc 3.1 and up support the [restrict] syntax. */ +#ifndef __restrict_arr +# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \ + && !defined __GNUG__ +# define __restrict_arr __restrict +# else +# define __restrict_arr +# endif +#endif + +/* POSIX compatibility. */ +extern int regcomp (regex_t *__restrict __preg, + const char *__restrict __pattern, + int __cflags); + +extern int regexec (const regex_t *__restrict __preg, + const char *__restrict __string, size_t __nmatch, + regmatch_t __pmatch[__restrict_arr], + int __eflags); + +extern size_t regerror (int __errcode, const regex_t *__restrict __preg, + char *__restrict __errbuf, size_t __errbuf_size); + +extern void regfree (regex_t *__preg); + + +#ifdef __cplusplus +} +#endif /* C++ */ + +#endif /* regex.h */ diff --git a/gnu_regex/regex_internal.c b/gnu_regex/regex_internal.c new file mode 100644 index 0000000..c9da2b9 --- /dev/null +++ b/gnu_regex/regex_internal.c @@ -0,0 +1,1713 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +static void re_string_construct_common (const char *str, int len, + re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) internal_function; +static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa, + const re_node_set *nodes, + unsigned int hash) internal_function; +static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, + const re_node_set *nodes, + unsigned int context, + unsigned int hash) internal_function; + +/* Functions for string operation. */ + +/* This function allocate the buffers. It is necessary to call + re_string_reconstruct before using the object. */ + +static reg_errcode_t +internal_function +re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) +{ + reg_errcode_t ret; + int init_buf_len; + + /* Ensure at least one character fits into the buffers. */ + if (init_len < dfa->mb_cur_max) + init_len = dfa->mb_cur_max; + init_buf_len = (len + 1 < init_len) ? len + 1: init_len; + re_string_construct_common (str, len, pstr, trans, icase, dfa); + + ret = re_string_realloc_buffers (pstr, init_buf_len); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + pstr->word_char = dfa->word_char; + pstr->word_ops_used = dfa->word_ops_used; + pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; + pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len; + pstr->valid_raw_len = pstr->valid_len; + return REG_NOERROR; +} + +/* This function allocate the buffers, and initialize them. */ + +static reg_errcode_t +internal_function +re_string_construct (re_string_t *pstr, const char *str, int len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) +{ + reg_errcode_t ret; + memset (pstr, '\0', sizeof (re_string_t)); + re_string_construct_common (str, len, pstr, trans, icase, dfa); + + if (len > 0) + { + ret = re_string_realloc_buffers (pstr, len + 1); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; + + if (icase) + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + while (1) + { + ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + if (pstr->valid_raw_len >= len) + break; + if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max) + break; + ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + } + else +#endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); + } + else + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + build_wcs_buffer (pstr); + else +#endif /* RE_ENABLE_I18N */ + { + if (trans != NULL) + re_string_translate_buffer (pstr); + else + { + pstr->valid_len = pstr->bufs_len; + pstr->valid_raw_len = pstr->bufs_len; + } + } + } + + return REG_NOERROR; +} + +/* Helper functions for re_string_allocate, and re_string_construct. */ + +static reg_errcode_t +internal_function +re_string_realloc_buffers (re_string_t *pstr, int new_buf_len) +{ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); + if (BE (new_wcs == NULL, 0)) + return REG_ESPACE; + pstr->wcs = new_wcs; + if (pstr->offsets != NULL) + { + int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len); + if (BE (new_offsets == NULL, 0)) + return REG_ESPACE; + pstr->offsets = new_offsets; + } + } +#endif /* RE_ENABLE_I18N */ + if (pstr->mbs_allocated) + { + unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, + new_buf_len); + if (BE (new_mbs == NULL, 0)) + return REG_ESPACE; + pstr->mbs = new_mbs; + } + pstr->bufs_len = new_buf_len; + return REG_NOERROR; +} + + +static void +internal_function +re_string_construct_common (const char *str, int len, re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) +{ + pstr->raw_mbs = (const unsigned char *) str; + pstr->len = len; + pstr->raw_len = len; + pstr->trans = trans; + pstr->icase = icase ? 1 : 0; + pstr->mbs_allocated = (trans != NULL || icase); + pstr->mb_cur_max = dfa->mb_cur_max; + pstr->is_utf8 = dfa->is_utf8; + pstr->map_notascii = dfa->map_notascii; + pstr->stop = pstr->len; + pstr->raw_stop = pstr->stop; +} + +#ifdef RE_ENABLE_I18N + +/* Build wide character buffer PSTR->WCS. + If the byte sequence of the string are: + (0), (1), (0), (1), + Then wide character buffer will be: + , WEOF , , WEOF , + We use WEOF for padding, they indicate that the position isn't + a first byte of a multibyte character. + + Note that this function assumes PSTR->VALID_LEN elements are already + built and starts from PSTR->VALID_LEN. */ + +static void +internal_function +build_wcs_buffer (re_string_t *pstr) +{ +#ifdef _LIBC + unsigned char buf[MB_LEN_MAX]; + assert (MB_LEN_MAX >= pstr->mb_cur_max); +#else + unsigned char buf[64]; +#endif + mbstate_t prev_st; + int byte_idx, end_idx, remain_len; + size_t mbclen; + + /* Build the buffers from pstr->valid_len to either pstr->len or + pstr->bufs_len. */ + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + for (byte_idx = pstr->valid_len; byte_idx < end_idx;) + { + wchar_t wc; + const char *p; + + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + /* Apply the translation if we need. */ + if (BE (pstr->trans != NULL, 0)) + { + int i, ch; + + for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + { + ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i]; + buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch]; + } + p = (const char *) buf; + } + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; + mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); + if (BE (mbclen == (size_t) -2, 0)) + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) + { + /* We treat these cases as a singlebyte character. */ + mbclen = 1; + wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + if (BE (pstr->trans != NULL, 0)) + wc = pstr->trans[wc]; + pstr->cur_state = prev_st; + } + + /* Write wide character and padding. */ + pstr->wcs[byte_idx++] = wc; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = byte_idx; +} + +/* Build wide character buffer PSTR->WCS like build_wcs_buffer, + but for REG_ICASE. */ + +static reg_errcode_t +internal_function +build_wcs_upper_buffer (re_string_t *pstr) +{ + mbstate_t prev_st; + int src_idx, byte_idx, end_idx, remain_len; + size_t mbclen; +#ifdef _LIBC + char buf[MB_LEN_MAX]; + assert (MB_LEN_MAX >= pstr->mb_cur_max); +#else + char buf[64]; +#endif + + byte_idx = pstr->valid_len; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + /* The following optimization assumes that ASCII characters can be + mapped to wide characters with a simple cast. */ + if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed) + { + while (byte_idx < end_idx) + { + wchar_t wc; + + if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]) + && mbsinit (&pstr->cur_state)) + { + /* In case of a singlebyte character. */ + pstr->mbs[byte_idx] + = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]); + /* The next step uses the assumption that wchar_t is encoded + ASCII-safe: all ASCII values can be converted like this. */ + pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx]; + ++byte_idx; + continue; + } + + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + mbclen = __mbrtowc (&wc, + ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx + + byte_idx), remain_len, &pstr->cur_state); + if (BE (mbclen + 2 > 2, 1)) + { + wchar_t wcu = wc; + if (iswlower (wc)) + { + size_t mbcdlen; + + wcu = towupper (wc); + mbcdlen = wcrtomb (buf, wcu, &prev_st); + if (BE (mbclen == mbcdlen, 1)) + memcpy (pstr->mbs + byte_idx, buf, mbclen); + else + { + src_idx = byte_idx; + goto offsets_needed; + } + } + else + memcpy (pstr->mbs + byte_idx, + pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); + pstr->wcs[byte_idx++] = wcu; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* It is an invalid character or '\0'. Just use the byte. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + pstr->mbs[byte_idx] = ch; + /* And also cast it to wide char. */ + pstr->wcs[byte_idx++] = (wchar_t) ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = byte_idx; + return REG_NOERROR; + } + else + for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;) + { + wchar_t wc; + const char *p; + offsets_needed: + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + if (BE (pstr->trans != NULL, 0)) + { + int i, ch; + + for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + { + ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i]; + buf[i] = pstr->trans[ch]; + } + p = (const char *) buf; + } + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx; + mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); + if (BE (mbclen + 2 > 2, 1)) + { + wchar_t wcu = wc; + if (iswlower (wc)) + { + size_t mbcdlen; + + wcu = towupper (wc); + mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); + if (BE (mbclen == mbcdlen, 1)) + memcpy (pstr->mbs + byte_idx, buf, mbclen); + else if (mbcdlen != (size_t) -1) + { + size_t i; + + if (byte_idx + mbcdlen > pstr->bufs_len) + { + pstr->cur_state = prev_st; + break; + } + + if (pstr->offsets == NULL) + { + pstr->offsets = re_malloc (int, pstr->bufs_len); + + if (pstr->offsets == NULL) + return REG_ESPACE; + } + if (!pstr->offsets_needed) + { + for (i = 0; i < (size_t) byte_idx; ++i) + pstr->offsets[i] = i; + pstr->offsets_needed = 1; + } + + memcpy (pstr->mbs + byte_idx, buf, mbcdlen); + pstr->wcs[byte_idx] = wcu; + pstr->offsets[byte_idx] = src_idx; + for (i = 1; i < mbcdlen; ++i) + { + pstr->offsets[byte_idx + i] + = src_idx + (i < mbclen ? i : mbclen - 1); + pstr->wcs[byte_idx + i] = WEOF; + } + pstr->len += mbcdlen - mbclen; + if (pstr->raw_stop > src_idx) + pstr->stop += mbcdlen - mbclen; + end_idx = (pstr->bufs_len > pstr->len) + ? pstr->len : pstr->bufs_len; + byte_idx += mbcdlen; + src_idx += mbclen; + continue; + } + else + memcpy (pstr->mbs + byte_idx, p, mbclen); + } + else + memcpy (pstr->mbs + byte_idx, p, mbclen); + + if (BE (pstr->offsets_needed != 0, 0)) + { + size_t i; + for (i = 0; i < mbclen; ++i) + pstr->offsets[byte_idx + i] = src_idx + i; + } + src_idx += mbclen; + + pstr->wcs[byte_idx++] = wcu; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* It is an invalid character or '\0'. Just use the byte. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; + + if (BE (pstr->trans != NULL, 0)) + ch = pstr->trans [ch]; + pstr->mbs[byte_idx] = ch; + + if (BE (pstr->offsets_needed != 0, 0)) + pstr->offsets[byte_idx] = src_idx; + ++src_idx; + + /* And also cast it to wide char. */ + pstr->wcs[byte_idx++] = (wchar_t) ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = src_idx; + return REG_NOERROR; +} + +/* Skip characters until the index becomes greater than NEW_RAW_IDX. + Return the index. */ + +static int +internal_function +re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc) +{ + mbstate_t prev_st; + int rawbuf_idx; + size_t mbclen; + wchar_t wc = WEOF; + + /* Skip the characters which are not necessary to check. */ + for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len; + rawbuf_idx < new_raw_idx;) + { + int remain_len; + remain_len = pstr->len - rawbuf_idx; + prev_st = pstr->cur_state; + mbclen = __mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx, + remain_len, &pstr->cur_state); + if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) + { + /* We treat these cases as a single byte character. */ + if (mbclen == 0 || remain_len == 0) + wc = L'\0'; + else + wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx); + mbclen = 1; + pstr->cur_state = prev_st; + } + /* Then proceed the next character. */ + rawbuf_idx += mbclen; + } + *last_wc = (wint_t) wc; + return rawbuf_idx; +} +#endif /* RE_ENABLE_I18N */ + +/* Build the buffer PSTR->MBS, and apply the translation if we need. + This function is used in case of REG_ICASE. */ + +static void +internal_function +build_upper_buffer (re_string_t *pstr) +{ + int char_idx, end_idx; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx) + { + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; + if (BE (pstr->trans != NULL, 0)) + ch = pstr->trans[ch]; + if (islower (ch)) + pstr->mbs[char_idx] = toupper (ch); + else + pstr->mbs[char_idx] = ch; + } + pstr->valid_len = char_idx; + pstr->valid_raw_len = char_idx; +} + +/* Apply TRANS to the buffer in PSTR. */ + +static void +internal_function +re_string_translate_buffer (re_string_t *pstr) +{ + int buf_idx, end_idx; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx) + { + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx]; + pstr->mbs[buf_idx] = pstr->trans[ch]; + } + + pstr->valid_len = buf_idx; + pstr->valid_raw_len = buf_idx; +} + +/* This function re-construct the buffers. + Concretely, convert to wide character in case of pstr->mb_cur_max > 1, + convert to upper case in case of REG_ICASE, apply translation. */ + +static reg_errcode_t +internal_function +re_string_reconstruct (re_string_t *pstr, int idx, int eflags) +{ + int offset = idx - pstr->raw_mbs_idx; + if (BE (offset < 0, 0)) + { + /* Reset buffer. */ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); +#endif /* RE_ENABLE_I18N */ + pstr->len = pstr->raw_len; + pstr->stop = pstr->raw_stop; + pstr->valid_len = 0; + pstr->raw_mbs_idx = 0; + pstr->valid_raw_len = 0; + pstr->offsets_needed = 0; + pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF + : CONTEXT_NEWLINE | CONTEXT_BEGBUF); + if (!pstr->mbs_allocated) + pstr->mbs = (unsigned char *) pstr->raw_mbs; + offset = idx; + } + + if (BE (offset != 0, 1)) + { + /* Should the already checked characters be kept? */ + if (BE (offset < pstr->valid_raw_len, 1)) + { + /* Yes, move them to the front of the buffer. */ +#ifdef RE_ENABLE_I18N + if (BE (pstr->offsets_needed, 0)) + { + int low = 0, high = pstr->valid_len, mid; + do + { + mid = (high + low) / 2; + if (pstr->offsets[mid] > offset) + high = mid; + else if (pstr->offsets[mid] < offset) + low = mid + 1; + else + break; + } + while (low < high); + if (pstr->offsets[mid] < offset) + ++mid; + pstr->tip_context = re_string_context_at (pstr, mid - 1, + eflags); + /* This can be quite complicated, so handle specially + only the common and easy case where the character with + different length representation of lower and upper + case is present at or after offset. */ + if (pstr->valid_len > offset + && mid == offset && pstr->offsets[mid] == offset) + { + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); + memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset); + pstr->valid_len -= offset; + pstr->valid_raw_len -= offset; + for (low = 0; low < pstr->valid_len; low++) + pstr->offsets[low] = pstr->offsets[low + offset] - offset; + } + else + { + /* Otherwise, just find out how long the partial multibyte + character at offset is and fill it with WEOF/255. */ + pstr->len = pstr->raw_len - idx + offset; + pstr->stop = pstr->raw_stop - idx + offset; + pstr->offsets_needed = 0; + while (mid > 0 && pstr->offsets[mid - 1] == offset) + --mid; + while (mid < pstr->valid_len) + if (pstr->wcs[mid] != WEOF) + break; + else + ++mid; + if (mid == pstr->valid_len) + pstr->valid_len = 0; + else + { + pstr->valid_len = pstr->offsets[mid] - offset; + if (pstr->valid_len) + { + for (low = 0; low < pstr->valid_len; ++low) + pstr->wcs[low] = WEOF; + memset (pstr->mbs, 255, pstr->valid_len); + } + } + pstr->valid_raw_len = pstr->valid_len; + } + } + else +#endif + { + pstr->tip_context = re_string_context_at (pstr, offset - 1, + eflags); +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); +#endif /* RE_ENABLE_I18N */ + if (BE (pstr->mbs_allocated, 0)) + memmove (pstr->mbs, pstr->mbs + offset, + pstr->valid_len - offset); + pstr->valid_len -= offset; + pstr->valid_raw_len -= offset; +#if DEBUG + assert (pstr->valid_len > 0); +#endif + } + } + else + { + /* No, skip all characters until IDX. */ + int prev_valid_len = pstr->valid_len; + +#ifdef RE_ENABLE_I18N + if (BE (pstr->offsets_needed, 0)) + { + pstr->len = pstr->raw_len - idx + offset; + pstr->stop = pstr->raw_stop - idx + offset; + pstr->offsets_needed = 0; + } +#endif + pstr->valid_len = 0; +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + int wcs_idx; + wint_t wc = WEOF; + + if (pstr->is_utf8) + { + const unsigned char *raw, *p, *q, *end; + + /* Special case UTF-8. Multi-byte chars start with any + byte other than 0x80 - 0xbf. */ + raw = pstr->raw_mbs + pstr->raw_mbs_idx; + end = raw + (offset - pstr->mb_cur_max); + if (end < pstr->raw_mbs) + end = pstr->raw_mbs; + p = raw + offset - 1; +#ifdef _LIBC + /* We know the wchar_t encoding is UCS4, so for the simple + case, ASCII characters, skip the conversion step. */ + if (isascii (*p) && BE (pstr->trans == NULL, 1)) + { + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); + /* pstr->valid_len = 0; */ + wc = (wchar_t) *p; + } + else +#endif + for (; p >= end; --p) + if ((*p & 0xc0) != 0x80) + { + mbstate_t cur_state; + wchar_t wc2; + int mlen = raw + pstr->len - p; + unsigned char buf[6]; + size_t mbclen; + + q = p; + if (BE (pstr->trans != NULL, 0)) + { + int i = mlen < 6 ? mlen : 6; + while (--i >= 0) + buf[i] = pstr->trans[p[i]]; + q = buf; + } + /* XXX Don't use mbrtowc, we know which conversion + to use (UTF-8 -> UCS4). */ + memset (&cur_state, 0, sizeof (cur_state)); + mbclen = __mbrtowc (&wc2, (const char *) p, mlen, + &cur_state); + if (raw + offset - p <= mbclen + && mbclen < (size_t) -2) + { + memset (&pstr->cur_state, '\0', + sizeof (mbstate_t)); + pstr->valid_len = mbclen - (raw + offset - p); + wc = wc2; + } + break; + } + } + + if (wc == WEOF) + pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; + if (wc == WEOF) + pstr->tip_context + = re_string_context_at (pstr, prev_valid_len - 1, eflags); + else + pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0) + && IS_WIDE_WORD_CHAR (wc)) + ? CONTEXT_WORD + : ((IS_WIDE_NEWLINE (wc) + && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); + if (BE (pstr->valid_len, 0)) + { + for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) + pstr->wcs[wcs_idx] = WEOF; + if (pstr->mbs_allocated) + memset (pstr->mbs, 255, pstr->valid_len); + } + pstr->valid_raw_len = pstr->valid_len; + } + else +#endif /* RE_ENABLE_I18N */ + { + int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; + pstr->valid_raw_len = 0; + if (pstr->trans) + c = pstr->trans[c]; + pstr->tip_context = (bitset_contain (pstr->word_char, c) + ? CONTEXT_WORD + : ((IS_NEWLINE (c) && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); + } + } + if (!BE (pstr->mbs_allocated, 0)) + pstr->mbs += offset; + } + pstr->raw_mbs_idx = idx; + pstr->len -= offset; + pstr->stop -= offset; + + /* Then build the buffers. */ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + if (pstr->icase) + { + reg_errcode_t ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + else + build_wcs_buffer (pstr); + } + else +#endif /* RE_ENABLE_I18N */ + if (BE (pstr->mbs_allocated, 0)) + { + if (pstr->icase) + build_upper_buffer (pstr); + else if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + else + pstr->valid_len = pstr->len; + + pstr->cur_idx = 0; + return REG_NOERROR; +} + +static unsigned char +internal_function __attribute ((pure)) +re_string_peek_byte_case (const re_string_t *pstr, int idx) +{ + int ch, off; + + /* Handle the common (easiest) cases first. */ + if (BE (!pstr->mbs_allocated, 1)) + return re_string_peek_byte (pstr, idx); + +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1 + && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx)) + return re_string_peek_byte (pstr, idx); +#endif + + off = pstr->cur_idx + idx; +#ifdef RE_ENABLE_I18N + if (pstr->offsets_needed) + off = pstr->offsets[off]; +#endif + + ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; + +#ifdef RE_ENABLE_I18N + /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I + this function returns CAPITAL LETTER I instead of first byte of + DOTLESS SMALL LETTER I. The latter would confuse the parser, + since peek_byte_case doesn't advance cur_idx in any way. */ + if (pstr->offsets_needed && !isascii (ch)) + return re_string_peek_byte (pstr, idx); +#endif + + return ch; +} + +static unsigned char +internal_function __attribute ((pure)) +re_string_fetch_byte_case (re_string_t *pstr) +{ + if (BE (!pstr->mbs_allocated, 1)) + return re_string_fetch_byte (pstr); + +#ifdef RE_ENABLE_I18N + if (pstr->offsets_needed) + { + int off, ch; + + /* For tr_TR.UTF-8 [[:islower:]] there is + [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip + in that case the whole multi-byte character and return + the original letter. On the other side, with + [[: DOTLESS SMALL LETTER I return [[:I, as doing + anything else would complicate things too much. */ + + if (!re_string_first_byte (pstr, pstr->cur_idx)) + return re_string_fetch_byte (pstr); + + off = pstr->offsets[pstr->cur_idx]; + ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; + + if (! isascii (ch)) + return re_string_fetch_byte (pstr); + + re_string_skip_bytes (pstr, + re_string_char_size_at (pstr, pstr->cur_idx)); + return ch; + } +#endif + + return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; +} + +static void +internal_function +re_string_destruct (re_string_t *pstr) +{ +#ifdef RE_ENABLE_I18N + re_free (pstr->wcs); + re_free (pstr->offsets); +#endif /* RE_ENABLE_I18N */ + if (pstr->mbs_allocated) + re_free (pstr->mbs); +} + +/* Return the context at IDX in INPUT. */ + +static unsigned int +internal_function +re_string_context_at (const re_string_t *input, int idx, int eflags) +{ + int c; + if (BE (idx < 0, 0)) + /* In this case, we use the value stored in input->tip_context, + since we can't know the character in input->mbs[-1] here. */ + return input->tip_context; + if (BE (idx == input->len, 0)) + return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF + : CONTEXT_NEWLINE | CONTEXT_ENDBUF); +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc; + int wc_idx = idx; + while(input->wcs[wc_idx] == WEOF) + { +#ifdef DEBUG + /* It must not happen. */ + assert (wc_idx >= 0); +#endif + --wc_idx; + if (wc_idx < 0) + return input->tip_context; + } + wc = input->wcs[wc_idx]; + if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc)) + return CONTEXT_WORD; + return (IS_WIDE_NEWLINE (wc) && input->newline_anchor + ? CONTEXT_NEWLINE : 0); + } + else +#endif + { + c = re_string_byte_at (input, idx); + if (bitset_contain (input->word_char, c)) + return CONTEXT_WORD; + return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0; + } +} + +/* Functions for set operation. */ + +static reg_errcode_t +internal_function +re_node_set_alloc (re_node_set *set, int size) +{ + set->alloc = size; + set->nelem = 0; + set->elems = re_malloc (int, size); + if (BE (set->elems == NULL, 0)) + return REG_ESPACE; + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_1 (re_node_set *set, int elem) +{ + set->alloc = 1; + set->nelem = 1; + set->elems = re_malloc (int, 1); + if (BE (set->elems == NULL, 0)) + { + set->alloc = set->nelem = 0; + return REG_ESPACE; + } + set->elems[0] = elem; + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_2 (re_node_set *set, int elem1, int elem2) +{ + set->alloc = 2; + set->elems = re_malloc (int, 2); + if (BE (set->elems == NULL, 0)) + return REG_ESPACE; + if (elem1 == elem2) + { + set->nelem = 1; + set->elems[0] = elem1; + } + else + { + set->nelem = 2; + if (elem1 < elem2) + { + set->elems[0] = elem1; + set->elems[1] = elem2; + } + else + { + set->elems[0] = elem2; + set->elems[1] = elem1; + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_copy (re_node_set *dest, const re_node_set *src) +{ + dest->nelem = src->nelem; + if (src->nelem > 0) + { + dest->alloc = dest->nelem; + dest->elems = re_malloc (int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + { + dest->alloc = dest->nelem = 0; + return REG_ESPACE; + } + memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); + } + else + re_node_set_init_empty (dest); + return REG_NOERROR; +} + +/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. + Note: We assume dest->elems is NULL, when dest->alloc is 0. */ + +static reg_errcode_t +internal_function +re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) +{ + int i1, i2, is, id, delta, sbase; + if (src1->nelem == 0 || src2->nelem == 0) + return REG_NOERROR; + + /* We need dest->nelem + 2 * elems_in_intersection; this is a + conservative estimate. */ + if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) + { + int new_alloc = src1->nelem + src2->nelem + dest->alloc; + int *new_elems = re_realloc (dest->elems, int, new_alloc); + if (BE (new_elems == NULL, 0)) + return REG_ESPACE; + dest->elems = new_elems; + dest->alloc = new_alloc; + } + + /* Find the items in the intersection of SRC1 and SRC2, and copy + into the top of DEST those that are not already in DEST itself. */ + sbase = dest->nelem + src1->nelem + src2->nelem; + i1 = src1->nelem - 1; + i2 = src2->nelem - 1; + id = dest->nelem - 1; + for (;;) + { + if (src1->elems[i1] == src2->elems[i2]) + { + /* Try to find the item in DEST. Maybe we could binary search? */ + while (id >= 0 && dest->elems[id] > src1->elems[i1]) + --id; + + if (id < 0 || dest->elems[id] != src1->elems[i1]) + dest->elems[--sbase] = src1->elems[i1]; + + if (--i1 < 0 || --i2 < 0) + break; + } + + /* Lower the highest of the two items. */ + else if (src1->elems[i1] < src2->elems[i2]) + { + if (--i2 < 0) + break; + } + else + { + if (--i1 < 0) + break; + } + } + + id = dest->nelem - 1; + is = dest->nelem + src1->nelem + src2->nelem - 1; + delta = is - sbase + 1; + + /* Now copy. When DELTA becomes zero, the remaining + DEST elements are already in place; this is more or + less the same loop that is in re_node_set_merge. */ + dest->nelem += delta; + if (delta > 0 && id >= 0) + for (;;) + { + if (dest->elems[is] > dest->elems[id]) + { + /* Copy from the top. */ + dest->elems[id + delta--] = dest->elems[is--]; + if (delta == 0) + break; + } + else + { + /* Slide from the bottom. */ + dest->elems[id + delta] = dest->elems[id]; + if (--id < 0) + break; + } + } + + /* Copy remaining SRC elements. */ + memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int)); + + return REG_NOERROR; +} + +/* Calculate the union set of the sets SRC1 and SRC2. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + +static reg_errcode_t +internal_function +re_node_set_init_union (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) +{ + int i1, i2, id; + if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0) + { + dest->alloc = src1->nelem + src2->nelem; + dest->elems = re_malloc (int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + return REG_ESPACE; + } + else + { + if (src1 != NULL && src1->nelem > 0) + return re_node_set_init_copy (dest, src1); + else if (src2 != NULL && src2->nelem > 0) + return re_node_set_init_copy (dest, src2); + else + re_node_set_init_empty (dest); + return REG_NOERROR; + } + for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) + { + if (src1->elems[i1] > src2->elems[i2]) + { + dest->elems[id++] = src2->elems[i2++]; + continue; + } + if (src1->elems[i1] == src2->elems[i2]) + ++i2; + dest->elems[id++] = src1->elems[i1++]; + } + if (i1 < src1->nelem) + { + memcpy (dest->elems + id, src1->elems + i1, + (src1->nelem - i1) * sizeof (int)); + id += src1->nelem - i1; + } + else if (i2 < src2->nelem) + { + memcpy (dest->elems + id, src2->elems + i2, + (src2->nelem - i2) * sizeof (int)); + id += src2->nelem - i2; + } + dest->nelem = id; + return REG_NOERROR; +} + +/* Calculate the union set of the sets DEST and SRC. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + +static reg_errcode_t +internal_function +re_node_set_merge (re_node_set *dest, const re_node_set *src) +{ + int is, id, sbase, delta; + if (src == NULL || src->nelem == 0) + return REG_NOERROR; + if (dest->alloc < 2 * src->nelem + dest->nelem) + { + int new_alloc = 2 * (src->nelem + dest->alloc); + int *new_buffer = re_realloc (dest->elems, int, new_alloc); + if (BE (new_buffer == NULL, 0)) + return REG_ESPACE; + dest->elems = new_buffer; + dest->alloc = new_alloc; + } + + if (BE (dest->nelem == 0, 0)) + { + dest->nelem = src->nelem; + memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); + return REG_NOERROR; + } + + /* Copy into the top of DEST the items of SRC that are not + found in DEST. Maybe we could binary search in DEST? */ + for (sbase = dest->nelem + 2 * src->nelem, + is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; ) + { + if (dest->elems[id] == src->elems[is]) + is--, id--; + else if (dest->elems[id] < src->elems[is]) + dest->elems[--sbase] = src->elems[is--]; + else /* if (dest->elems[id] > src->elems[is]) */ + --id; + } + + if (is >= 0) + { + /* If DEST is exhausted, the remaining items of SRC must be unique. */ + sbase -= is + 1; + memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int)); + } + + id = dest->nelem - 1; + is = dest->nelem + 2 * src->nelem - 1; + delta = is - sbase + 1; + if (delta == 0) + return REG_NOERROR; + + /* Now copy. When DELTA becomes zero, the remaining + DEST elements are already in place. */ + dest->nelem += delta; + for (;;) + { + if (dest->elems[is] > dest->elems[id]) + { + /* Copy from the top. */ + dest->elems[id + delta--] = dest->elems[is--]; + if (delta == 0) + break; + } + else + { + /* Slide from the bottom. */ + dest->elems[id + delta] = dest->elems[id]; + if (--id < 0) + { + /* Copy remaining SRC elements. */ + memcpy (dest->elems, dest->elems + sbase, + delta * sizeof (int)); + break; + } + } + } + + return REG_NOERROR; +} + +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have ELEM. + return -1 if an error is occured, return 1 otherwise. */ + +static int +internal_function +re_node_set_insert (re_node_set *set, int elem) +{ + int idx; + /* In case the set is empty. */ + if (set->alloc == 0) + { + if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1)) + return 1; + else + return -1; + } + + if (BE (set->nelem, 0) == 0) + { + /* We already guaranteed above that set->alloc != 0. */ + set->elems[0] = elem; + ++set->nelem; + return 1; + } + + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_elems; + set->alloc = set->alloc * 2; + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) + return -1; + set->elems = new_elems; + } + + /* Move the elements which follows the new element. Test the + first element separately to skip a check in the inner loop. */ + if (elem < set->elems[0]) + { + idx = 0; + for (idx = set->nelem; idx > 0; idx--) + set->elems[idx] = set->elems[idx - 1]; + } + else + { + for (idx = set->nelem; set->elems[idx - 1] > elem; idx--) + set->elems[idx] = set->elems[idx - 1]; + } + + /* Insert the new element. */ + set->elems[idx] = elem; + ++set->nelem; + return 1; +} + +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have any element greater than or equal to ELEM. + Return -1 if an error is occured, return 1 otherwise. */ + +static int +internal_function +re_node_set_insert_last (re_node_set *set, int elem) +{ + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_elems; + set->alloc = (set->alloc + 1) * 2; + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) + return -1; + set->elems = new_elems; + } + + /* Insert the new element. */ + set->elems[set->nelem++] = elem; + return 1; +} + +/* Compare two node sets SET1 and SET2. + return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */ + +static int +internal_function __attribute ((pure)) +re_node_set_compare (const re_node_set *set1, const re_node_set *set2) +{ + int i; + if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) + return 0; + for (i = set1->nelem ; --i >= 0 ; ) + if (set1->elems[i] != set2->elems[i]) + return 0; + return 1; +} + +/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ + +static int +internal_function __attribute ((pure)) +re_node_set_contains (const re_node_set *set, int elem) +{ + unsigned int idx, right, mid; + if (set->nelem <= 0) + return 0; + + /* Binary search the element. */ + idx = 0; + right = set->nelem - 1; + while (idx < right) + { + mid = (idx + right) / 2; + if (set->elems[mid] < elem) + idx = mid + 1; + else + right = mid; + } + return set->elems[idx] == elem ? idx + 1 : 0; +} + +static void +internal_function +re_node_set_remove_at (re_node_set *set, int idx) +{ + if (idx < 0 || idx >= set->nelem) + return; + --set->nelem; + for (; idx < set->nelem; idx++) + set->elems[idx] = set->elems[idx + 1]; +} + + +/* Add the token TOKEN to dfa->nodes, and return the index of the token. + Or return -1, if an error will be occured. */ + +static int +internal_function +re_dfa_add_node (re_dfa_t *dfa, re_token_t token) +{ + int type = token.type; + if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) + { + size_t new_nodes_alloc = dfa->nodes_alloc * 2; + int *new_nexts, *new_indices; + re_node_set *new_edests, *new_eclosures; + re_token_t *new_nodes; + + /* Avoid overflows. */ + if (BE (new_nodes_alloc < dfa->nodes_alloc, 0)) + return -1; + + new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); + if (BE (new_nodes == NULL, 0)) + return -1; + dfa->nodes = new_nodes; + new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc); + new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc); + new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); + new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); + if (BE (new_nexts == NULL || new_indices == NULL + || new_edests == NULL || new_eclosures == NULL, 0)) + return -1; + dfa->nexts = new_nexts; + dfa->org_indices = new_indices; + dfa->edests = new_edests; + dfa->eclosures = new_eclosures; + dfa->nodes_alloc = new_nodes_alloc; + } + dfa->nodes[dfa->nodes_len] = token; + dfa->nodes[dfa->nodes_len].constraint = 0; +#ifdef RE_ENABLE_I18N + dfa->nodes[dfa->nodes_len].accept_mb = + (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET; +#endif + dfa->nexts[dfa->nodes_len] = -1; + re_node_set_init_empty (dfa->edests + dfa->nodes_len); + re_node_set_init_empty (dfa->eclosures + dfa->nodes_len); + return dfa->nodes_len++; +} + +static inline unsigned int +internal_function +calc_state_hash (const re_node_set *nodes, unsigned int context) +{ + unsigned int hash = nodes->nelem + context; + int i; + for (i = 0 ; i < nodes->nelem ; i++) + hash += nodes->elems[i]; + return hash; +} + +/* Search for the state whose node_set is equivalent to NODES. + Return the pointer to the state, if we found it in the DFA. + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t * +internal_function +re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes) +{ + unsigned int hash; + re_dfastate_t *new_state; + struct re_state_table_entry *spot; + int i; + if (BE (nodes->nelem == 0, 0)) + { + *err = REG_NOERROR; + return NULL; + } + hash = calc_state_hash (nodes, 0); + spot = dfa->state_table + (hash & dfa->state_hash_mask); + + for (i = 0 ; i < spot->num ; i++) + { + re_dfastate_t *state = spot->array[i]; + if (hash != state->hash) + continue; + if (re_node_set_compare (&state->nodes, nodes)) + return state; + } + + /* There are no appropriate state in the dfa, create the new one. */ + new_state = create_ci_newstate (dfa, nodes, hash); + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; +} + +/* Search for the state whose node_set is equivalent to NODES and + whose context is equivalent to CONTEXT. + Return the pointer to the state, if we found it in the DFA. + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t * +internal_function +re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes, unsigned int context) +{ + unsigned int hash; + re_dfastate_t *new_state; + struct re_state_table_entry *spot; + int i; + if (nodes->nelem == 0) + { + *err = REG_NOERROR; + return NULL; + } + hash = calc_state_hash (nodes, context); + spot = dfa->state_table + (hash & dfa->state_hash_mask); + + for (i = 0 ; i < spot->num ; i++) + { + re_dfastate_t *state = spot->array[i]; + if (state->hash == hash + && state->context == context + && re_node_set_compare (state->entrance_nodes, nodes)) + return state; + } + /* There are no appropriate state in `dfa', create the new one. */ + new_state = create_cd_newstate (dfa, nodes, context, hash); + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; +} + +/* Finish initialization of the new state NEWSTATE, and using its hash value + HASH put in the appropriate bucket of DFA's state table. Return value + indicates the error code if failed. */ + +static reg_errcode_t +register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, + unsigned int hash) +{ + struct re_state_table_entry *spot; + reg_errcode_t err; + int i; + + newstate->hash = hash; + err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + for (i = 0; i < newstate->nodes.nelem; i++) + { + int elem = newstate->nodes.elems[i]; + if (!IS_EPSILON_NODE (dfa->nodes[elem].type)) + re_node_set_insert_last (&newstate->non_eps_nodes, elem); + } + + spot = dfa->state_table + (hash & dfa->state_hash_mask); + if (BE (spot->alloc <= spot->num, 0)) + { + int new_alloc = 2 * spot->num + 2; + re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *, + new_alloc); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + spot->array = new_array; + spot->alloc = new_alloc; + } + spot->array[spot->num++] = newstate; + return REG_NOERROR; +} + +static void +free_state (re_dfastate_t *state) +{ + re_node_set_free (&state->non_eps_nodes); + re_node_set_free (&state->inveclosure); + if (state->entrance_nodes != &state->nodes) + { + re_node_set_free (state->entrance_nodes); + re_free (state->entrance_nodes); + } + re_node_set_free (&state->nodes); + re_free (state->word_trtable); + re_free (state->trtable); + re_free (state); +} + +/* Create the new state which is independ of contexts. + Return the new state if succeeded, otherwise return NULL. */ + +static re_dfastate_t * +internal_function +create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int hash) +{ + int i; + reg_errcode_t err; + re_dfastate_t *newstate; + + newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (BE (newstate == NULL, 0)) + return NULL; + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } + + newstate->entrance_nodes = &newstate->nodes; + for (i = 0 ; i < nodes->nelem ; i++) + { + re_token_t *node = dfa->nodes + nodes->elems[i]; + re_token_type_t type = node->type; + if (type == CHARACTER && !node->constraint) + continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; + else if (type == OP_BACK_REF) + newstate->has_backref = 1; + else if (type == ANCHOR || node->constraint) + newstate->has_constraint = 1; + } + err = register_state (dfa, newstate, hash); + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} + +/* Create the new state which is depend on the context CONTEXT. + Return the new state if succeeded, otherwise return NULL. */ + +static re_dfastate_t * +internal_function +create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int context, unsigned int hash) +{ + int i, nctx_nodes = 0; + reg_errcode_t err; + re_dfastate_t *newstate; + + newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (BE (newstate == NULL, 0)) + return NULL; + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } + + newstate->context = context; + newstate->entrance_nodes = &newstate->nodes; + + for (i = 0 ; i < nodes->nelem ; i++) + { + re_token_t *node = dfa->nodes + nodes->elems[i]; + re_token_type_t type = node->type; + unsigned int constraint = node->constraint; + + if (type == CHARACTER && !constraint) + continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; + else if (type == OP_BACK_REF) + newstate->has_backref = 1; + + if (constraint) + { + if (newstate->entrance_nodes == &newstate->nodes) + { + newstate->entrance_nodes = re_malloc (re_node_set, 1); + if (BE (newstate->entrance_nodes == NULL, 0)) + { + free_state (newstate); + return NULL; + } + re_node_set_init_copy (newstate->entrance_nodes, nodes); + nctx_nodes = 0; + newstate->has_constraint = 1; + } + + if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) + { + re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); + ++nctx_nodes; + } + } + } + err = register_state (dfa, newstate, hash); + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} diff --git a/gnu_regex/regex_internal.h b/gnu_regex/regex_internal.h new file mode 100644 index 0000000..71c4a38 --- /dev/null +++ b/gnu_regex/regex_internal.h @@ -0,0 +1,773 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002-2005, 2007, 2008 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _REGEX_INTERNAL_H +#define _REGEX_INTERNAL_H 1 + +#include +#include +#include +#include +#include + +#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC +# include +#endif +#if defined HAVE_LOCALE_H || defined _LIBC +# include +#endif +#if defined HAVE_WCHAR_H || defined _LIBC +# include +#endif /* HAVE_WCHAR_H || _LIBC */ +#if defined HAVE_WCTYPE_H || defined _LIBC +# include +#endif /* HAVE_WCTYPE_H || _LIBC */ +#if defined HAVE_STDBOOL_H || defined _LIBC +# include +#endif /* HAVE_STDBOOL_H || _LIBC */ +#if defined HAVE_STDINT_H || defined _LIBC +# include +#endif /* HAVE_STDINT_H || _LIBC */ +#if defined _LIBC +# include +#else +# define __libc_lock_define(CLASS,NAME) +# define __libc_lock_init(NAME) do { } while (0) +# define __libc_lock_lock(NAME) do { } while (0) +# define __libc_lock_unlock(NAME) do { } while (0) +#endif + +/* In case that the system doesn't have isblank(). */ +#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank +# define isblank(ch) ((ch) == ' ' || (ch) == '\t') +#endif + +#ifdef _LIBC +# ifndef _RE_DEFINE_LOCALE_FUNCTIONS +# define _RE_DEFINE_LOCALE_FUNCTIONS 1 +# include +# include +# include +# endif +#endif + +/* This is for other GNU distributions with internationalized messages. */ +#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include +# ifdef _LIBC +# undef gettext +# define gettext(msgid) \ + INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES) +# endif +#else +# define gettext(msgid) (msgid) +#endif + +#ifndef gettext_noop +/* This define is so xgettext can find the internationalizable + strings. */ +# define gettext_noop(String) String +#endif + +/* For loser systems without the definition. */ +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif + +#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC +# define RE_ENABLE_I18N +#endif + +#if __GNUC__ >= 3 +# define BE(expr, val) __builtin_expect (expr, val) +#else +# define BE(expr, val) (expr) +# define inline +#endif + +/* Number of single byte character. */ +#define SBC_MAX 256 + +#define COLL_ELEM_LEN_MAX 8 + +/* The character which represents newline. */ +#define NEWLINE_CHAR '\n' +#define WIDE_NEWLINE_CHAR L'\n' + +/* Rename to standard API for using out of glibc. */ +#ifndef _LIBC +# define __wctype wctype +# define __iswctype iswctype +# define __btowc btowc +# define __mbrtowc mbrtowc +# define __mempcpy mempcpy +# define __wcrtomb wcrtomb +# define __regfree regfree +# define attribute_hidden +#endif /* not _LIBC */ + +#ifdef __GNUC__ +# define __attribute(arg) __attribute__ (arg) +#else +# define __attribute(arg) +#endif + +extern const char __re_error_msgid[] attribute_hidden; +extern const size_t __re_error_msgid_idx[] attribute_hidden; + +/* An integer used to represent a set of bits. It must be unsigned, + and must be at least as wide as unsigned int. */ +typedef unsigned long int bitset_word_t; +/* All bits set in a bitset_word_t. */ +#define BITSET_WORD_MAX ULONG_MAX +/* Number of bits in a bitset_word_t. */ +#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT) +/* Number of bitset_word_t in a bit_set. */ +#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS) +typedef bitset_word_t bitset_t[BITSET_WORDS]; +typedef bitset_word_t *re_bitset_ptr_t; +typedef const bitset_word_t *re_const_bitset_ptr_t; + +#define bitset_set(set,i) \ + (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS) +#define bitset_clear(set,i) \ + (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_contain(set,i) \ + (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t)) +#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t)) +#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t)) + +#define PREV_WORD_CONSTRAINT 0x0001 +#define PREV_NOTWORD_CONSTRAINT 0x0002 +#define NEXT_WORD_CONSTRAINT 0x0004 +#define NEXT_NOTWORD_CONSTRAINT 0x0008 +#define PREV_NEWLINE_CONSTRAINT 0x0010 +#define NEXT_NEWLINE_CONSTRAINT 0x0020 +#define PREV_BEGBUF_CONSTRAINT 0x0040 +#define NEXT_ENDBUF_CONSTRAINT 0x0080 +#define WORD_DELIM_CONSTRAINT 0x0100 +#define NOT_WORD_DELIM_CONSTRAINT 0x0200 + +typedef enum +{ + INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, + WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, + WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + LINE_FIRST = PREV_NEWLINE_CONSTRAINT, + LINE_LAST = NEXT_NEWLINE_CONSTRAINT, + BUF_FIRST = PREV_BEGBUF_CONSTRAINT, + BUF_LAST = NEXT_ENDBUF_CONSTRAINT, + WORD_DELIM = WORD_DELIM_CONSTRAINT, + NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT +} re_context_type; + +typedef struct +{ + int alloc; + int nelem; + int *elems; +} re_node_set; + +typedef enum +{ + NON_TYPE = 0, + + /* Node type, These are used by token, node, tree. */ + CHARACTER = 1, + END_OF_RE = 2, + SIMPLE_BRACKET = 3, + OP_BACK_REF = 4, + OP_PERIOD = 5, +#ifdef RE_ENABLE_I18N + COMPLEX_BRACKET = 6, + OP_UTF8_PERIOD = 7, +#endif /* RE_ENABLE_I18N */ + + /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used + when the debugger shows values of this enum type. */ +#define EPSILON_BIT 8 + OP_OPEN_SUBEXP = EPSILON_BIT | 0, + OP_CLOSE_SUBEXP = EPSILON_BIT | 1, + OP_ALT = EPSILON_BIT | 2, + OP_DUP_ASTERISK = EPSILON_BIT | 3, + ANCHOR = EPSILON_BIT | 4, + + /* Tree type, these are used only by tree. */ + CONCAT = 16, + SUBEXP = 17, + + /* Token type, these are used only by token. */ + OP_DUP_PLUS = 18, + OP_DUP_QUESTION, + OP_OPEN_BRACKET, + OP_CLOSE_BRACKET, + OP_CHARSET_RANGE, + OP_OPEN_DUP_NUM, + OP_CLOSE_DUP_NUM, + OP_NON_MATCH_LIST, + OP_OPEN_COLL_ELEM, + OP_CLOSE_COLL_ELEM, + OP_OPEN_EQUIV_CLASS, + OP_CLOSE_EQUIV_CLASS, + OP_OPEN_CHAR_CLASS, + OP_CLOSE_CHAR_CLASS, + OP_WORD, + OP_NOTWORD, + OP_SPACE, + OP_NOTSPACE, + BACK_SLASH + +} re_token_type_t; + +#ifdef RE_ENABLE_I18N +typedef struct +{ + /* Multibyte characters. */ + wchar_t *mbchars; + + /* Collating symbols. */ +# ifdef _LIBC + int32_t *coll_syms; +# endif + + /* Equivalence classes. */ +# ifdef _LIBC + int32_t *equiv_classes; +# endif + + /* Range expressions. */ +# ifdef _LIBC + uint32_t *range_starts; + uint32_t *range_ends; +# else /* not _LIBC */ + wchar_t *range_starts; + wchar_t *range_ends; +# endif /* not _LIBC */ + + /* Character classes. */ + wctype_t *char_classes; + + /* If this character set is the non-matching list. */ + unsigned int non_match : 1; + + /* # of multibyte characters. */ + int nmbchars; + + /* # of collating symbols. */ + int ncoll_syms; + + /* # of equivalence classes. */ + int nequiv_classes; + + /* # of range expressions. */ + int nranges; + + /* # of character classes. */ + int nchar_classes; +} re_charset_t; +#endif /* RE_ENABLE_I18N */ + +typedef struct +{ + union + { + unsigned char c; /* for CHARACTER */ + re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; /* for COMPLEX_BRACKET */ +#endif /* RE_ENABLE_I18N */ + int idx; /* for BACK_REF */ + re_context_type ctx_type; /* for ANCHOR */ + } opr; +#if __GNUC__ >= 2 + re_token_type_t type : 8; +#else + re_token_type_t type; +#endif + unsigned int constraint : 10; /* context constraint */ + unsigned int duplicated : 1; + unsigned int opt_subexp : 1; +#ifdef RE_ENABLE_I18N + unsigned int accept_mb : 1; + /* These 2 bits can be moved into the union if needed (e.g. if running out + of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ + unsigned int mb_partial : 1; +#endif + unsigned int word_char : 1; +} re_token_t; + +#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT) + +struct re_string_t +{ + /* Indicate the raw buffer which is the original string passed as an + argument of regexec(), re_search(), etc.. */ + const unsigned char *raw_mbs; + /* Store the multibyte string. In case of "case insensitive mode" like + REG_ICASE, upper cases of the string are stored, otherwise MBS points + the same address that RAW_MBS points. */ + unsigned char *mbs; +#ifdef RE_ENABLE_I18N + /* Store the wide character string which is corresponding to MBS. */ + wint_t *wcs; + int *offsets; + mbstate_t cur_state; +#endif + /* Index in RAW_MBS. Each character mbs[i] corresponds to + raw_mbs[raw_mbs_idx + i]. */ + int raw_mbs_idx; + /* The length of the valid characters in the buffers. */ + int valid_len; + /* The corresponding number of bytes in raw_mbs array. */ + int valid_raw_len; + /* The length of the buffers MBS and WCS. */ + int bufs_len; + /* The index in MBS, which is updated by re_string_fetch_byte. */ + int cur_idx; + /* length of RAW_MBS array. */ + int raw_len; + /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */ + int len; + /* End of the buffer may be shorter than its length in the cases such + as re_match_2, re_search_2. Then, we use STOP for end of the buffer + instead of LEN. */ + int raw_stop; + /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */ + int stop; + + /* The context of mbs[0]. We store the context independently, since + the context of mbs[0] may be different from raw_mbs[0], which is + the beginning of the input string. */ + unsigned int tip_context; + /* The translation passed as a part of an argument of re_compile_pattern. */ + RE_TRANSLATE_TYPE trans; + /* Copy of re_dfa_t's word_char. */ + re_const_bitset_ptr_t word_char; + /* 1 if REG_ICASE. */ + unsigned char icase; + unsigned char is_utf8; + unsigned char map_notascii; + unsigned char mbs_allocated; + unsigned char offsets_needed; + unsigned char newline_anchor; + unsigned char word_ops_used; + int mb_cur_max; +}; +typedef struct re_string_t re_string_t; + + +struct re_dfa_t; +typedef struct re_dfa_t re_dfa_t; + +#ifndef _LIBC +# ifdef __i386__ +# define internal_function __attribute ((regparm (3), stdcall)) +# else +# define internal_function +# endif +#endif + +#ifndef NOT_IN_libc +static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, + int new_buf_len) + internal_function; +# ifdef RE_ENABLE_I18N +static void build_wcs_buffer (re_string_t *pstr) internal_function; +static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr) + internal_function; +# endif /* RE_ENABLE_I18N */ +static void build_upper_buffer (re_string_t *pstr) internal_function; +static void re_string_translate_buffer (re_string_t *pstr) internal_function; +static unsigned int re_string_context_at (const re_string_t *input, int idx, + int eflags) + internal_function __attribute ((pure)); +#endif +#define re_string_peek_byte(pstr, offset) \ + ((pstr)->mbs[(pstr)->cur_idx + offset]) +#define re_string_fetch_byte(pstr) \ + ((pstr)->mbs[(pstr)->cur_idx++]) +#define re_string_first_byte(pstr, idx) \ + ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF) +#define re_string_is_single_byte_char(pstr, idx) \ + ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \ + || (pstr)->wcs[(idx) + 1] != WEOF)) +#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx) +#define re_string_cur_idx(pstr) ((pstr)->cur_idx) +#define re_string_get_buffer(pstr) ((pstr)->mbs) +#define re_string_length(pstr) ((pstr)->len) +#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx]) +#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) +#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) + +#ifdef WIN32 +# include +#else +# include +#endif + +#ifndef _LIBC +# if HAVE_ALLOCA +/* The OS usually guarantees only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + allocate anything larger than 4096 bytes. Also care for the possibility + of a few compiler-allocated temporary stack slots. */ +# define __libc_use_alloca(n) ((n) < 4032) +# else +/* alloca is implemented with malloc, so just use malloc. */ +# define __libc_use_alloca(n) 0 +# endif +#endif + +#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) +#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t))) +#define re_free(p) free (p) + +struct bin_tree_t +{ + struct bin_tree_t *parent; + struct bin_tree_t *left; + struct bin_tree_t *right; + struct bin_tree_t *first; + struct bin_tree_t *next; + + re_token_t token; + + /* `node_idx' is the index in dfa->nodes, if `type' == 0. + Otherwise `type' indicate the type of this node. */ + int node_idx; +}; +typedef struct bin_tree_t bin_tree_t; + +#define BIN_TREE_STORAGE_SIZE \ + ((1024 - sizeof (void *)) / sizeof (bin_tree_t)) + +struct bin_tree_storage_t +{ + struct bin_tree_storage_t *next; + bin_tree_t data[BIN_TREE_STORAGE_SIZE]; +}; +typedef struct bin_tree_storage_t bin_tree_storage_t; + +#define CONTEXT_WORD 1 +#define CONTEXT_NEWLINE (CONTEXT_WORD << 1) +#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1) +#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1) + +#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD) +#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE) +#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF) +#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF) +#define IS_ORDINARY_CONTEXT(c) ((c) == 0) + +#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_') +#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) +#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_') +#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) + +#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \ + ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ + || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ + || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\ + || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context))) + +#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \ + ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ + || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ + || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \ + || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context))) + +struct re_dfastate_t +{ + unsigned int hash; + re_node_set nodes; + re_node_set non_eps_nodes; + re_node_set inveclosure; + re_node_set *entrance_nodes; + struct re_dfastate_t **trtable, **word_trtable; + unsigned int context : 4; + unsigned int halt : 1; + /* If this state can accept `multi byte'. + Note that we refer to multibyte characters, and multi character + collating elements as `multi byte'. */ + unsigned int accept_mb : 1; + /* If this state has backreference node(s). */ + unsigned int has_backref : 1; + unsigned int has_constraint : 1; +}; +typedef struct re_dfastate_t re_dfastate_t; + +struct re_state_table_entry +{ + int num; + int alloc; + re_dfastate_t **array; +}; + +/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */ + +typedef struct +{ + int next_idx; + int alloc; + re_dfastate_t **array; +} state_array_t; + +/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */ + +typedef struct +{ + int node; + int str_idx; /* The position NODE match at. */ + state_array_t path; +} re_sub_match_last_t; + +/* Store information about the node NODE whose type is OP_OPEN_SUBEXP. + And information about the node, whose type is OP_CLOSE_SUBEXP, + corresponding to NODE is stored in LASTS. */ + +typedef struct +{ + int str_idx; + int node; + state_array_t *path; + int alasts; /* Allocation size of LASTS. */ + int nlasts; /* The number of LASTS. */ + re_sub_match_last_t **lasts; +} re_sub_match_top_t; + +struct re_backref_cache_entry +{ + int node; + int str_idx; + int subexp_from; + int subexp_to; + char more; + char unused; + unsigned short int eps_reachable_subexps_map; +}; + +typedef struct +{ + /* The string object corresponding to the input string. */ + re_string_t input; +#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) + const re_dfa_t *const dfa; +#else + const re_dfa_t *dfa; +#endif + /* EFLAGS of the argument of regexec. */ + int eflags; + /* Where the matching ends. */ + int match_last; + int last_node; + /* The state log used by the matcher. */ + re_dfastate_t **state_log; + int state_log_top; + /* Back reference cache. */ + int nbkref_ents; + int abkref_ents; + struct re_backref_cache_entry *bkref_ents; + int max_mb_elem_len; + int nsub_tops; + int asub_tops; + re_sub_match_top_t **sub_tops; +} re_match_context_t; + +typedef struct +{ + re_dfastate_t **sifted_states; + re_dfastate_t **limited_states; + int last_node; + int last_str_idx; + re_node_set limits; +} re_sift_context_t; + +struct re_fail_stack_ent_t +{ + int idx; + int node; + regmatch_t *regs; + re_node_set eps_via_nodes; +}; + +struct re_fail_stack_t +{ + int num; + int alloc; + struct re_fail_stack_ent_t *stack; +}; + +struct re_dfa_t +{ + re_token_t *nodes; + size_t nodes_alloc; + size_t nodes_len; + int *nexts; + int *org_indices; + re_node_set *edests; + re_node_set *eclosures; + re_node_set *inveclosures; + struct re_state_table_entry *state_table; + re_dfastate_t *init_state; + re_dfastate_t *init_state_word; + re_dfastate_t *init_state_nl; + re_dfastate_t *init_state_begbuf; + bin_tree_t *str_tree; + bin_tree_storage_t *str_tree_storage; + re_bitset_ptr_t sb_char; + int str_tree_storage_idx; + + /* number of subexpressions `re_nsub' is in regex_t. */ + unsigned int state_hash_mask; + int init_node; + int nbackref; /* The number of backreference in this dfa. */ + + /* Bitmap expressing which backreference is used. */ + bitset_word_t used_bkref_map; + bitset_word_t completed_bkref_map; + + unsigned int has_plural_match : 1; + /* If this dfa has "multibyte node", which is a backreference or + a node which can accept multibyte character or multi character + collating element. */ + unsigned int has_mb_node : 1; + unsigned int is_utf8 : 1; + unsigned int map_notascii : 1; + unsigned int word_ops_used : 1; + int mb_cur_max; + bitset_t word_char; + reg_syntax_t syntax; + int *subexp_map; +#ifdef DEBUG + char* re_str; +#endif + __libc_lock_define (, lock) +}; + +#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) +#define re_node_set_remove(set,id) \ + (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) +#define re_node_set_empty(p) ((p)->nelem = 0) +#define re_node_set_free(set) re_free ((set)->elems) + + +typedef enum +{ + SB_CHAR, + MB_CHAR, + EQUIV_CLASS, + COLL_SYM, + CHAR_CLASS +} bracket_elem_type; + +typedef struct +{ + bracket_elem_type type; + union + { + unsigned char ch; + unsigned char *name; + wchar_t wch; + } opr; +} bracket_elem_t; + + +/* Inline functions for bitset operation. */ +static inline void +bitset_not (bitset_t set) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + set[bitset_i] = ~set[bitset_i]; +} + +static inline void +bitset_merge (bitset_t dest, const bitset_t src) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] |= src[bitset_i]; +} + +static inline void +bitset_mask (bitset_t dest, const bitset_t src) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] &= src[bitset_i]; +} + +#ifdef RE_ENABLE_I18N +/* Inline functions for re_string. */ +static inline int +internal_function __attribute ((pure)) +re_string_char_size_at (const re_string_t *pstr, int idx) +{ + int byte_idx; + if (pstr->mb_cur_max == 1) + return 1; + for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx) + if (pstr->wcs[idx + byte_idx] != WEOF) + break; + return byte_idx; +} + +static inline wint_t +internal_function __attribute ((pure)) +re_string_wchar_at (const re_string_t *pstr, int idx) +{ + if (pstr->mb_cur_max == 1) + return (wint_t) pstr->mbs[idx]; + return (wint_t) pstr->wcs[idx]; +} + +# ifndef NOT_IN_libc +static int +internal_function __attribute ((pure)) +re_string_elem_size_at (const re_string_t *pstr, int idx) +{ +# ifdef _LIBC + const unsigned char *p, *extra; + const int32_t *table, *indirect; + int32_t tmp; +# include + uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + + if (nrules != 0) + { + table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_INDIRECTMB); + p = pstr->mbs + idx; + tmp = findidx (&p); + return p - pstr->mbs - idx; + } + else +# endif /* _LIBC */ + return 1; +} +# endif +#endif /* RE_ENABLE_I18N */ + +#endif /* _REGEX_INTERNAL_H */ diff --git a/gnu_regex/regexec.c b/gnu_regex/regexec.c new file mode 100644 index 0000000..560921d --- /dev/null +++ b/gnu_regex/regexec.c @@ -0,0 +1,4338 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2004, 2005, 2007, 2009 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, + int n) internal_function; +static void match_ctx_clean (re_match_context_t *mctx) internal_function; +static void match_ctx_free (re_match_context_t *cache) internal_function; +static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, + int str_idx, int from, int to) + internal_function; +static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) + internal_function; +static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node, + int str_idx) internal_function; +static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, + int node, int str_idx) + internal_function; +static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, + int last_str_idx) + internal_function; +static reg_errcode_t re_search_internal (const regex_t *preg, + const char *string, int length, + int start, int range, int stop, + size_t nmatch, regmatch_t pmatch[], + int eflags) internal_function; +static int re_search_2_stub (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, + int start, int range, struct re_registers *regs, + int stop, int ret_len) internal_function; +static int re_search_stub (struct re_pattern_buffer *bufp, + const char *string, int length, int start, + int range, int stop, struct re_registers *regs, + int ret_len) internal_function; +static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, + int nregs, int regs_allocated) internal_function; +static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) + internal_function; +static int check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) internal_function; +static int check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) + internal_function; +static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, + int cur_idx, int nmatch) internal_function; +static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, + int str_idx, int dest_node, int nregs, + regmatch_t *regs, + re_node_set *eps_via_nodes) + internal_function; +static reg_errcode_t set_regs (const regex_t *preg, + const re_match_context_t *mctx, + size_t nmatch, regmatch_t *pmatch, + int fl_backtrack) internal_function; +static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) + internal_function; + +#ifdef RE_ENABLE_I18N +static int sift_states_iter_mb (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) + internal_function; +#endif /* RE_ENABLE_I18N */ +static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, + re_sift_context_t *sctx) + internal_function; +static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *cur_dest) + internal_function; +static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, + re_node_set *dest_nodes) + internal_function; +static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates) + internal_function; +static int check_dst_limits (const re_match_context_t *mctx, + re_node_set *limits, + int dst_node, int dst_idx, int src_node, + int src_idx) internal_function; +static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, + int boundaries, int subexp_idx, + int from_node, int bkref_idx) + internal_function; +static int check_dst_limits_calc_pos (const re_match_context_t *mctx, + int limit, int subexp_idx, + int node, int str_idx, + int bkref_idx) internal_function; +static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates, + re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, + int str_idx) internal_function; +static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) + internal_function; +static reg_errcode_t merge_state_array (const re_dfa_t *dfa, + re_dfastate_t **dst, + re_dfastate_t **src, int num) + internal_function; +static re_dfastate_t *find_recover_state (reg_errcode_t *err, + re_match_context_t *mctx) internal_function; +static re_dfastate_t *transit_state (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *state) internal_function; +static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *next_state) + internal_function; +static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, + re_node_set *cur_nodes, + int str_idx) internal_function; +#if 0 +static re_dfastate_t *transit_state_sb (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *pstate) + internal_function; +#endif +#ifdef RE_ENABLE_I18N +static reg_errcode_t transit_state_mb (re_match_context_t *mctx, + re_dfastate_t *pstate) + internal_function; +#endif /* RE_ENABLE_I18N */ +static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, + const re_node_set *nodes) + internal_function; +static reg_errcode_t get_subexp (re_match_context_t *mctx, + int bkref_node, int bkref_str_idx) + internal_function; +static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, + const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, + int bkref_node, int bkref_str) + internal_function; +static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) internal_function; +static reg_errcode_t check_arrival (re_match_context_t *mctx, + state_array_t *path, int top_node, + int top_str, int last_node, int last_str, + int type) internal_function; +static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, + int str_idx, + re_node_set *cur_nodes, + re_node_set *next_nodes) + internal_function; +static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, + re_node_set *cur_nodes, + int ex_subexp, int type) + internal_function; +static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, + re_node_set *dst_nodes, + int target, int ex_subexp, + int type) internal_function; +static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, + re_node_set *cur_nodes, int cur_str, + int subexp_num, int type) + internal_function; +static int build_trtable (const re_dfa_t *dfa, + re_dfastate_t *state) internal_function; +#ifdef RE_ENABLE_I18N +static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int idx) + internal_function; +# ifdef _LIBC +static unsigned int find_collation_sequence_value (const unsigned char *mbs, + size_t name_len) + internal_function; +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ +static int group_nodes_into_DFAstates (const re_dfa_t *dfa, + const re_dfastate_t *state, + re_node_set *states_node, + bitset_t *states_ch) internal_function; +static int check_node_accept (const re_match_context_t *mctx, + const re_token_t *node, int idx) + internal_function; +static reg_errcode_t extend_buffers (re_match_context_t *mctx) + internal_function; + +/* Entry point for POSIX code. */ + +/* regexec searches for a given pattern, specified by PREG, in the + string STRING. + + If NMATCH is zero or REG_NOSUB was set in the cflags argument to + `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + least NMATCH elements, and we set them to the offsets of the + corresponding matched substrings. + + EFLAGS specifies `execution flags' which affect matching: if + REG_NOTBOL is set, then ^ does not match at the beginning of the + string; if REG_NOTEOL is set, then $ does not match at the end. + + We return 0 if we find a match and REG_NOMATCH if not. */ + +int +regexec (preg, string, nmatch, pmatch, eflags) + const regex_t *__restrict preg; + const char *__restrict string; + size_t nmatch; + regmatch_t pmatch[]; + int eflags; +{ + reg_errcode_t err; + int start, length; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + + if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) + return REG_BADPAT; + + if (eflags & REG_STARTEND) + { + start = pmatch[0].rm_so; + length = pmatch[0].rm_eo; + } + else + { + start = 0; + length = strlen (string); + } + + __libc_lock_lock (dfa->lock); + if (preg->no_sub) + err = re_search_internal (preg, string, length, start, length - start, + length, 0, NULL, eflags); + else + err = re_search_internal (preg, string, length, start, length - start, + length, nmatch, pmatch, eflags); + __libc_lock_unlock (dfa->lock); + return err != REG_NOERROR; +} + +#ifdef _LIBC +# include +versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4); + +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +__typeof__ (__regexec) __compat_regexec; + +int +attribute_compat_text_section +__compat_regexec (const regex_t *__restrict preg, + const char *__restrict string, size_t nmatch, + regmatch_t pmatch[], int eflags) +{ + return regexec (preg, string, nmatch, pmatch, + eflags & (REG_NOTBOL | REG_NOTEOL)); +} +compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); +# endif +#endif + +/* Entry points for GNU code. */ + +/* re_match, re_search, re_match_2, re_search_2 + + The former two functions operate on STRING with length LENGTH, + while the later two operate on concatenation of STRING1 and STRING2 + with lengths LENGTH1 and LENGTH2, respectively. + + re_match() matches the compiled pattern in BUFP against the string, + starting at index START. + + re_search() first tries matching at index START, then it tries to match + starting from index START + 1, and so on. The last start position tried + is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same + way as re_match().) + + The parameter STOP of re_{match,search}_2 specifies that no match exceeding + the first STOP characters of the concatenation of the strings should be + concerned. + + If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match + and all groups is stroed in REGS. (For the "_2" variants, the offsets are + computed relative to the concatenation, not relative to the individual + strings.) + + On success, re_match* functions return the length of the match, re_search* + return the position of the start of the match. Return value -1 means no + match was found and -2 indicates an internal error. */ + +int +re_match (bufp, string, length, start, regs) + struct re_pattern_buffer *bufp; + const char *string; + int length, start; + struct re_registers *regs; +{ + return re_search_stub (bufp, string, length, start, 0, length, regs, 1); +} +#ifdef _LIBC +weak_alias (__re_match, re_match) +#endif + +int +re_search (bufp, string, length, start, range, regs) + struct re_pattern_buffer *bufp; + const char *string; + int length, start, range; + struct re_registers *regs; +{ + return re_search_stub (bufp, string, length, start, range, length, regs, 0); +} +#ifdef _LIBC +weak_alias (__re_search, re_search) +#endif + +int +re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int length1, length2, start, stop; + struct re_registers *regs; +{ + return re_search_2_stub (bufp, string1, length1, string2, length2, + start, 0, regs, stop, 1); +} +#ifdef _LIBC +weak_alias (__re_match_2, re_match_2) +#endif + +int +re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int length1, length2, start, range, stop; + struct re_registers *regs; +{ + return re_search_2_stub (bufp, string1, length1, string2, length2, + start, range, regs, stop, 0); +} +#ifdef _LIBC +weak_alias (__re_search_2, re_search_2) +#endif + +static int +re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs, + stop, ret_len) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int length1, length2, start, range, stop, ret_len; + struct re_registers *regs; +{ + const char *str; + int rval; + int len = length1 + length2; + int free_str = 0; + + if (BE (length1 < 0 || length2 < 0 || stop < 0, 0)) + return -2; + + /* Concatenate the strings. */ + if (length2 > 0) + if (length1 > 0) + { + char *s = re_malloc (char, len); + + if (BE (s == NULL, 0)) + return -2; +#ifdef _LIBC + memcpy (__mempcpy (s, string1, length1), string2, length2); +#else + memcpy (s, string1, length1); + memcpy (s + length1, string2, length2); +#endif + str = s; + free_str = 1; + } + else + str = string2; + else + str = string1; + + rval = re_search_stub (bufp, str, len, start, range, stop, regs, + ret_len); + if (free_str) + re_free ((char *) str); + return rval; +} + +/* The parameters have the same meaning as those of re_search. + Additional parameters: + If RET_LEN is nonzero the length of the match is returned (re_match style); + otherwise the position of the match is returned. */ + +static int +re_search_stub (bufp, string, length, start, range, stop, regs, ret_len) + struct re_pattern_buffer *bufp; + const char *string; + int length, start, range, stop, ret_len; + struct re_registers *regs; +{ + reg_errcode_t result; + regmatch_t *pmatch; + int nregs, rval; + int eflags = 0; + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + + /* Check for out-of-range. */ + if (BE (start < 0 || start > length, 0)) + return -1; + if (BE (start + range > length, 0)) + range = length - start; + else if (BE (start + range < 0, 0)) + range = -start; + + __libc_lock_lock (dfa->lock); + + eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; + eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; + + /* Compile fastmap if we haven't yet. */ + if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate) + re_compile_fastmap (bufp); + + if (BE (bufp->no_sub, 0)) + regs = NULL; + + /* We need at least 1 register. */ + if (regs == NULL) + nregs = 1; + else if (BE (bufp->regs_allocated == REGS_FIXED && + regs->num_regs < bufp->re_nsub + 1, 0)) + { + nregs = regs->num_regs; + if (BE (nregs < 1, 0)) + { + /* Nothing can be copied to regs. */ + regs = NULL; + nregs = 1; + } + } + else + nregs = bufp->re_nsub + 1; + pmatch = re_malloc (regmatch_t, nregs); + if (BE (pmatch == NULL, 0)) + { + rval = -2; + goto out; + } + + result = re_search_internal (bufp, string, length, start, range, stop, + nregs, pmatch, eflags); + + rval = 0; + + /* I hope we needn't fill ther regs with -1's when no match was found. */ + if (result != REG_NOERROR) + rval = -1; + else if (regs != NULL) + { + /* If caller wants register contents data back, copy them. */ + bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, + bufp->regs_allocated); + if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) + rval = -2; + } + + if (BE (rval == 0, 1)) + { + if (ret_len) + { + assert (pmatch[0].rm_so == start); + rval = pmatch[0].rm_eo - start; + } + else + rval = pmatch[0].rm_so; + } + re_free (pmatch); + out: + __libc_lock_unlock (dfa->lock); + return rval; +} + +static unsigned +re_copy_regs (regs, pmatch, nregs, regs_allocated) + struct re_registers *regs; + regmatch_t *pmatch; + int nregs, regs_allocated; +{ + int rval = REGS_REALLOCATE; + int i; + int need_regs = nregs + 1; + /* We need one extra element beyond `num_regs' for the `-1' marker GNU code + uses. */ + + /* Have the register data arrays been allocated? */ + if (regs_allocated == REGS_UNALLOCATED) + { /* No. So allocate them with malloc. */ + regs->start = re_malloc (regoff_t, need_regs); + regs->end = re_malloc (regoff_t, need_regs); + if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0)) + return REGS_UNALLOCATED; + regs->num_regs = need_regs; + } + else if (regs_allocated == REGS_REALLOCATE) + { /* Yes. If we need more elements than were already + allocated, reallocate them. If we need fewer, just + leave it alone. */ + if (BE (need_regs > regs->num_regs, 0)) + { + regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); + regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs); + if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0)) + return REGS_UNALLOCATED; + regs->start = new_start; + regs->end = new_end; + regs->num_regs = need_regs; + } + } + else + { + assert (regs_allocated == REGS_FIXED); + /* This function may not be called with REGS_FIXED and nregs too big. */ + assert (regs->num_regs >= nregs); + rval = REGS_FIXED; + } + + /* Copy the regs. */ + for (i = 0; i < nregs; ++i) + { + regs->start[i] = pmatch[i].rm_so; + regs->end[i] = pmatch[i].rm_eo; + } + for ( ; i < regs->num_regs; ++i) + regs->start[i] = regs->end[i] = -1; + + return rval; +} + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use + this memory for recording register information. STARTS and ENDS + must be allocated using the malloc library routine, and must each + be at least NUM_REGS * sizeof (regoff_t) bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ + +void +re_set_registers (bufp, regs, num_regs, starts, ends) + struct re_pattern_buffer *bufp; + struct re_registers *regs; + unsigned num_regs; + regoff_t *starts, *ends; +{ + if (num_regs) + { + bufp->regs_allocated = REGS_REALLOCATE; + regs->num_regs = num_regs; + regs->start = starts; + regs->end = ends; + } + else + { + bufp->regs_allocated = REGS_UNALLOCATED; + regs->num_regs = 0; + regs->start = regs->end = (regoff_t *) 0; + } +} +#ifdef _LIBC +weak_alias (__re_set_registers, re_set_registers) +#endif + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined _REGEX_RE_COMP || defined _LIBC +int +# ifdef _LIBC +weak_function +# endif +re_exec (s) + const char *s; +{ + return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); +} +#endif /* _REGEX_RE_COMP */ + +/* Internal entry point. */ + +/* Searches for a compiled pattern PREG in the string STRING, whose + length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same + mingings with regexec. START, and RANGE have the same meanings + with re_search. + Return REG_NOERROR if we find a match, and REG_NOMATCH if not, + otherwise return the error code. + Note: We assume front end functions already check ranges. + (START + RANGE >= 0 && START + RANGE <= LENGTH) */ + +static reg_errcode_t +re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, + eflags) + const regex_t *preg; + const char *string; + int length, start, range, stop, eflags; + size_t nmatch; + regmatch_t pmatch[]; +{ + reg_errcode_t err; + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + int left_lim, right_lim, incr; + int fl_longest_match, match_first, match_kind, match_last = -1; + int extra_nmatch; + int sb, ch; +#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) + re_match_context_t mctx = { .dfa = dfa }; +#else + re_match_context_t mctx; +#endif + char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate + && range && !preg->can_be_null) ? preg->fastmap : NULL; + RE_TRANSLATE_TYPE t = preg->translate; + +#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) + memset (&mctx, '\0', sizeof (re_match_context_t)); + mctx.dfa = dfa; +#endif + + extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; + nmatch -= extra_nmatch; + + /* Check if the DFA haven't been compiled. */ + if (BE (preg->used == 0 || dfa->init_state == NULL + || dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL, 0)) + return REG_NOMATCH; + +#ifdef DEBUG + /* We assume front-end functions already check them. */ + assert (start + range >= 0 && start + range <= length); +#endif + + /* If initial states with non-begbuf contexts have no elements, + the regex must be anchored. If preg->newline_anchor is set, + we'll never use init_state_nl, so do not check it. */ + if (dfa->init_state->nodes.nelem == 0 + && dfa->init_state_word->nodes.nelem == 0 + && (dfa->init_state_nl->nodes.nelem == 0 + || !preg->newline_anchor)) + { + if (start != 0 && start + range != 0) + return REG_NOMATCH; + start = range = 0; + } + + /* We must check the longest matching, if nmatch > 0. */ + fl_longest_match = (nmatch != 0 || dfa->nbackref); + + err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, + preg->translate, preg->syntax & RE_ICASE, dfa); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + mctx.input.stop = stop; + mctx.input.raw_stop = stop; + mctx.input.newline_anchor = preg->newline_anchor; + + err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* We will log all the DFA states through which the dfa pass, + if nmatch > 1, or this dfa has "multibyte node", which is a + back-reference or a node which can accept multibyte character or + multi character collating element. */ + if (nmatch > 1 || dfa->has_mb_node) + { + mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); + if (BE (mctx.state_log == NULL, 0)) + { + err = REG_ESPACE; + goto free_return; + } + } + else + mctx.state_log = NULL; + + match_first = start; + mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF + : CONTEXT_NEWLINE | CONTEXT_BEGBUF; + + /* Check incrementally whether of not the input string match. */ + incr = (range < 0) ? -1 : 1; + left_lim = (range < 0) ? start + range : start; + right_lim = (range < 0) ? start : start + range; + sb = dfa->mb_cur_max == 1; + match_kind = + (fastmap + ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) + | (range >= 0 ? 2 : 0) + | (t != NULL ? 1 : 0)) + : 8); + + for (;; match_first += incr) + { + err = REG_NOMATCH; + if (match_first < left_lim || right_lim < match_first) + goto free_return; + + /* Advance as rapidly as possible through the string, until we + find a plausible place to start matching. This may be done + with varying efficiency, so there are various possibilities: + only the most common of them are specialized, in order to + save on code size. We use a switch statement for speed. */ + switch (match_kind) + { + case 8: + /* No fastmap. */ + break; + + case 7: + /* Fastmap with single-byte translation, match forward. */ + while (BE (match_first < right_lim, 1) + && !fastmap[t[(unsigned char) string[match_first]]]) + ++match_first; + goto forward_match_found_start_or_reached_end; + + case 6: + /* Fastmap without translation, match forward. */ + while (BE (match_first < right_lim, 1) + && !fastmap[(unsigned char) string[match_first]]) + ++match_first; + + forward_match_found_start_or_reached_end: + if (BE (match_first == right_lim, 0)) + { + ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (!fastmap[t ? t[ch] : ch]) + goto free_return; + } + break; + + case 4: + case 5: + /* Fastmap without multi-byte translation, match backwards. */ + while (match_first >= left_lim) + { + ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (fastmap[t ? t[ch] : ch]) + break; + --match_first; + } + if (match_first < left_lim) + goto free_return; + break; + + default: + /* In this case, we can't determine easily the current byte, + since it might be a component byte of a multibyte + character. Then we use the constructed buffer instead. */ + for (;;) + { + /* If MATCH_FIRST is out of the valid range, reconstruct the + buffers. */ + unsigned int offset = match_first - mctx.input.raw_mbs_idx; + if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0)) + { + err = re_string_reconstruct (&mctx.input, match_first, + eflags); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + offset = match_first - mctx.input.raw_mbs_idx; + } + /* If MATCH_FIRST is out of the buffer, leave it as '\0'. + Note that MATCH_FIRST must not be smaller than 0. */ + ch = (match_first >= length + ? 0 : re_string_byte_at (&mctx.input, offset)); + if (fastmap[ch]) + break; + match_first += incr; + if (match_first < left_lim || match_first > right_lim) + { + err = REG_NOMATCH; + goto free_return; + } + } + break; + } + + /* Reconstruct the buffers so that the matcher can assume that + the matching starts from the beginning of the buffer. */ + err = re_string_reconstruct (&mctx.input, match_first, eflags); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + +#ifdef RE_ENABLE_I18N + /* Don't consider this char as a possible match start if it part, + yet isn't the head, of a multibyte character. */ + if (!sb && !re_string_first_byte (&mctx.input, 0)) + continue; +#endif + + /* It seems to be appropriate one, then use the matcher. */ + /* We assume that the matching starts from 0. */ + mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; + match_last = check_matching (&mctx, fl_longest_match, + range >= 0 ? &match_first : NULL); + if (match_last != -1) + { + if (BE (match_last == -2, 0)) + { + err = REG_ESPACE; + goto free_return; + } + else + { + mctx.match_last = match_last; + if ((!preg->no_sub && nmatch > 1) || dfa->nbackref) + { + re_dfastate_t *pstate = mctx.state_log[match_last]; + mctx.last_node = check_halt_state_context (&mctx, pstate, + match_last); + } + if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match) + || dfa->nbackref) + { + err = prune_impossible_nodes (&mctx); + if (err == REG_NOERROR) + break; + if (BE (err != REG_NOMATCH, 0)) + goto free_return; + match_last = -1; + } + else + break; /* We found a match. */ + } + } + + match_ctx_clean (&mctx); + } + +#ifdef DEBUG + assert (match_last != -1); + assert (err == REG_NOERROR); +#endif + + /* Set pmatch[] if we need. */ + if (nmatch > 0) + { + int reg_idx; + + /* Initialize registers. */ + for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) + pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; + + /* Set the points where matching start/end. */ + pmatch[0].rm_so = 0; + pmatch[0].rm_eo = mctx.match_last; + + if (!preg->no_sub && nmatch > 1) + { + err = set_regs (preg, &mctx, nmatch, pmatch, + dfa->has_plural_match && dfa->nbackref > 0); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + + /* At last, add the offset to the each registers, since we slided + the buffers so that we could assume that the matching starts + from 0. */ + for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + if (pmatch[reg_idx].rm_so != -1) + { +#ifdef RE_ENABLE_I18N + if (BE (mctx.input.offsets_needed != 0, 0)) + { + pmatch[reg_idx].rm_so = + (pmatch[reg_idx].rm_so == mctx.input.valid_len + ? mctx.input.valid_raw_len + : mctx.input.offsets[pmatch[reg_idx].rm_so]); + pmatch[reg_idx].rm_eo = + (pmatch[reg_idx].rm_eo == mctx.input.valid_len + ? mctx.input.valid_raw_len + : mctx.input.offsets[pmatch[reg_idx].rm_eo]); + } +#else + assert (mctx.input.offsets_needed == 0); +#endif + pmatch[reg_idx].rm_so += match_first; + pmatch[reg_idx].rm_eo += match_first; + } + for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx) + { + pmatch[nmatch + reg_idx].rm_so = -1; + pmatch[nmatch + reg_idx].rm_eo = -1; + } + + if (dfa->subexp_map) + for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) + if (dfa->subexp_map[reg_idx] != reg_idx) + { + pmatch[reg_idx + 1].rm_so + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; + pmatch[reg_idx + 1].rm_eo + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; + } + } + + free_return: + re_free (mctx.state_log); + if (dfa->nbackref) + match_ctx_free (&mctx); + re_string_destruct (&mctx.input); + return err; +} + +static reg_errcode_t +prune_impossible_nodes (mctx) + re_match_context_t *mctx; +{ + const re_dfa_t *const dfa = mctx->dfa; + int halt_node, match_last; + reg_errcode_t ret; + re_dfastate_t **sifted_states; + re_dfastate_t **lim_states = NULL; + re_sift_context_t sctx; +#ifdef DEBUG + assert (mctx->state_log != NULL); +#endif + match_last = mctx->match_last; + halt_node = mctx->last_node; + sifted_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (sifted_states == NULL, 0)) + { + ret = REG_ESPACE; + goto free_return; + } + if (dfa->nbackref) + { + lim_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (lim_states == NULL, 0)) + { + ret = REG_ESPACE; + goto free_return; + } + while (1) + { + memset (lim_states, '\0', + sizeof (re_dfastate_t *) * (match_last + 1)); + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, + match_last); + ret = sift_states_backward (mctx, &sctx); + re_node_set_free (&sctx.limits); + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + if (sifted_states[0] != NULL || lim_states[0] != NULL) + break; + do + { + --match_last; + if (match_last < 0) + { + ret = REG_NOMATCH; + goto free_return; + } + } while (mctx->state_log[match_last] == NULL + || !mctx->state_log[match_last]->halt); + halt_node = check_halt_state_context (mctx, + mctx->state_log[match_last], + match_last); + } + ret = merge_state_array (dfa, sifted_states, lim_states, + match_last + 1); + re_free (lim_states); + lim_states = NULL; + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + } + else + { + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last); + ret = sift_states_backward (mctx, &sctx); + re_node_set_free (&sctx.limits); + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + if (sifted_states[0] == NULL) + { + ret = REG_NOMATCH; + goto free_return; + } + } + re_free (mctx->state_log); + mctx->state_log = sifted_states; + sifted_states = NULL; + mctx->last_node = halt_node; + mctx->match_last = match_last; + ret = REG_NOERROR; + free_return: + re_free (sifted_states); + re_free (lim_states); + return ret; +} + +/* Acquire an initial state and return it. + We must select appropriate initial state depending on the context, + since initial states may have constraints like "\<", "^", etc.. */ + +static inline re_dfastate_t * +__attribute ((always_inline)) internal_function +acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, + int idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + if (dfa->init_state->has_constraint) + { + unsigned int context; + context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags); + if (IS_WORD_CONTEXT (context)) + return dfa->init_state_word; + else if (IS_ORDINARY_CONTEXT (context)) + return dfa->init_state; + else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context)) + return dfa->init_state_begbuf; + else if (IS_NEWLINE_CONTEXT (context)) + return dfa->init_state_nl; + else if (IS_BEGBUF_CONTEXT (context)) + { + /* It is relatively rare case, then calculate on demand. */ + return re_acquire_state_context (err, dfa, + dfa->init_state->entrance_nodes, + context); + } + else + /* Must not happen? */ + return dfa->init_state; + } + else + return dfa->init_state; +} + +/* Check whether the regular expression match input string INPUT or not, + and return the index where the matching end, return -1 if not match, + or return -2 in case of an error. + FL_LONGEST_MATCH means we want the POSIX longest matching. + If P_MATCH_FIRST is not NULL, and the match fails, it is set to the + next place where we may want to try matching. + Note that the matcher assume that the maching starts from the current + index of the buffer. */ + +static int +internal_function +check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int match = 0; + int match_last = -1; + int cur_str_idx = re_string_cur_idx (&mctx->input); + re_dfastate_t *cur_state; + int at_init_state = p_match_first != NULL; + int next_start_idx = cur_str_idx; + + err = REG_NOERROR; + cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); + /* An initial state must not be NULL (invalid). */ + if (BE (cur_state == NULL, 0)) + { + assert (err == REG_ESPACE); + return -2; + } + + if (mctx->state_log != NULL) + { + mctx->state_log[cur_str_idx] = cur_state; + + /* Check OP_OPEN_SUBEXP in the initial state in case that we use them + later. E.g. Processing back references. */ + if (BE (dfa->nbackref, 0)) + { + at_init_state = 0; + err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (cur_state->has_backref) + { + err = transit_state_bkref (mctx, &cur_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + } + + /* If the RE accepts NULL string. */ + if (BE (cur_state->halt, 0)) + { + if (!cur_state->has_constraint + || check_halt_state_context (mctx, cur_state, cur_str_idx)) + { + if (!fl_longest_match) + return cur_str_idx; + else + { + match_last = cur_str_idx; + match = 1; + } + } + } + + while (!re_string_eoi (&mctx->input)) + { + re_dfastate_t *old_state = cur_state; + int next_char_idx = re_string_cur_idx (&mctx->input) + 1; + + if (BE (next_char_idx >= mctx->input.bufs_len, 0) + || (BE (next_char_idx >= mctx->input.valid_len, 0) + && mctx->input.valid_len < mctx->input.len)) + { + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + { + assert (err == REG_ESPACE); + return -2; + } + } + + cur_state = transit_state (&err, mctx, cur_state); + if (mctx->state_log != NULL) + cur_state = merge_state_with_log (&err, mctx, cur_state); + + if (cur_state == NULL) + { + /* Reached the invalid state or an error. Try to recover a valid + state using the state log, if available and if we have not + already found a valid (even if not the longest) match. */ + if (BE (err != REG_NOERROR, 0)) + return -2; + + if (mctx->state_log == NULL + || (match && !fl_longest_match) + || (cur_state = find_recover_state (&err, mctx)) == NULL) + break; + } + + if (BE (at_init_state, 0)) + { + if (old_state == cur_state) + next_start_idx = next_char_idx; + else + at_init_state = 0; + } + + if (cur_state->halt) + { + /* Reached a halt state. + Check the halt state can satisfy the current context. */ + if (!cur_state->has_constraint + || check_halt_state_context (mctx, cur_state, + re_string_cur_idx (&mctx->input))) + { + /* We found an appropriate halt state. */ + match_last = re_string_cur_idx (&mctx->input); + match = 1; + + /* We found a match, do not modify match_first below. */ + p_match_first = NULL; + if (!fl_longest_match) + break; + } + } + } + + if (p_match_first) + *p_match_first += next_start_idx; + + return match_last; +} + +/* Check NODE match the current context. */ + +static int +internal_function +check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context) +{ + re_token_type_t type = dfa->nodes[node].type; + unsigned int constraint = dfa->nodes[node].constraint; + if (type != END_OF_RE) + return 0; + if (!constraint) + return 1; + if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) + return 0; + return 1; +} + +/* Check the halt state STATE match the current context. + Return 0 if not match, if the node, STATE has, is a halt node and + match the context, return the node. */ + +static int +internal_function +check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) +{ + int i; + unsigned int context; +#ifdef DEBUG + assert (state->halt); +#endif + context = re_string_context_at (&mctx->input, idx, mctx->eflags); + for (i = 0; i < state->nodes.nelem; ++i) + if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context)) + return state->nodes.elems[i]; + return 0; +} + +/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA + corresponding to the DFA). + Return the destination node, and update EPS_VIA_NODES, return -1 in case + of errors. */ + +static int +internal_function +proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs, + int *pidx, int node, re_node_set *eps_via_nodes, + struct re_fail_stack_t *fs) +{ + const re_dfa_t *const dfa = mctx->dfa; + int i, err; + if (IS_EPSILON_NODE (dfa->nodes[node].type)) + { + re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; + re_node_set *edests = &dfa->edests[node]; + int dest_node; + err = re_node_set_insert (eps_via_nodes, node); + if (BE (err < 0, 0)) + return -2; + /* Pick up a valid destination, or return -1 if none is found. */ + for (dest_node = -1, i = 0; i < edests->nelem; ++i) + { + int candidate = edests->elems[i]; + if (!re_node_set_contains (cur_nodes, candidate)) + continue; + if (dest_node == -1) + dest_node = candidate; + + else + { + /* In order to avoid infinite loop like "(a*)*", return the second + epsilon-transition if the first was already considered. */ + if (re_node_set_contains (eps_via_nodes, dest_node)) + return candidate; + + /* Otherwise, push the second epsilon-transition on the fail stack. */ + else if (fs != NULL + && push_fail_stack (fs, *pidx, candidate, nregs, regs, + eps_via_nodes)) + return -2; + + /* We know we are going to exit. */ + break; + } + } + return dest_node; + } + else + { + int naccepted = 0; + re_token_type_t type = dfa->nodes[node].type; + +#ifdef RE_ENABLE_I18N + if (dfa->nodes[node].accept_mb) + naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); + else +#endif /* RE_ENABLE_I18N */ + if (type == OP_BACK_REF) + { + int subexp_idx = dfa->nodes[node].opr.idx + 1; + naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; + if (fs != NULL) + { + if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) + return -1; + else if (naccepted) + { + char *buf = (char *) re_string_get_buffer (&mctx->input); + if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, + naccepted) != 0) + return -1; + } + } + + if (naccepted == 0) + { + int dest_node; + err = re_node_set_insert (eps_via_nodes, node); + if (BE (err < 0, 0)) + return -2; + dest_node = dfa->edests[node].elems[0]; + if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, + dest_node)) + return dest_node; + } + } + + if (naccepted != 0 + || check_node_accept (mctx, dfa->nodes + node, *pidx)) + { + int dest_node = dfa->nexts[node]; + *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; + if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL + || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, + dest_node))) + return -1; + re_node_set_empty (eps_via_nodes); + return dest_node; + } + } + return -1; +} + +static reg_errcode_t +internal_function +push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node, + int nregs, regmatch_t *regs, re_node_set *eps_via_nodes) +{ + reg_errcode_t err; + int num = fs->num++; + if (fs->num == fs->alloc) + { + struct re_fail_stack_ent_t *new_array; + new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) + * fs->alloc * 2)); + if (new_array == NULL) + return REG_ESPACE; + fs->alloc *= 2; + fs->stack = new_array; + } + fs->stack[num].idx = str_idx; + fs->stack[num].node = dest_node; + fs->stack[num].regs = re_malloc (regmatch_t, nregs); + if (fs->stack[num].regs == NULL) + return REG_ESPACE; + memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); + err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); + return err; +} + +static int +internal_function +pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, + regmatch_t *regs, re_node_set *eps_via_nodes) +{ + int num = --fs->num; + assert (num >= 0); + *pidx = fs->stack[num].idx; + memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); + re_node_set_free (eps_via_nodes); + re_free (fs->stack[num].regs); + *eps_via_nodes = fs->stack[num].eps_via_nodes; + return fs->stack[num].node; +} + +/* Set the positions where the subexpressions are starts/ends to registers + PMATCH. + Note: We assume that pmatch[0] is already set, and + pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ + +static reg_errcode_t +internal_function +set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, + regmatch_t *pmatch, int fl_backtrack) +{ + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + int idx, cur_node; + re_node_set eps_via_nodes; + struct re_fail_stack_t *fs; + struct re_fail_stack_t fs_body = { 0, 2, NULL }; + regmatch_t *prev_idx_match; + int prev_idx_match_malloced = 0; + +#ifdef DEBUG + assert (nmatch > 1); + assert (mctx->state_log != NULL); +#endif + if (fl_backtrack) + { + fs = &fs_body; + fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc); + if (fs->stack == NULL) + return REG_ESPACE; + } + else + fs = NULL; + + cur_node = dfa->init_node; + re_node_set_init_empty (&eps_via_nodes); + + if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) + prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t)); + else + { + prev_idx_match = re_malloc (regmatch_t, nmatch); + if (prev_idx_match == NULL) + { + free_fail_stack_return (fs); + return REG_ESPACE; + } + prev_idx_match_malloced = 1; + } + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); + + for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) + { + update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); + + if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) + { + int reg_idx; + if (fs) + { + for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) + break; + if (reg_idx == nmatch) + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return free_fail_stack_return (fs); + } + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + &eps_via_nodes); + } + else + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return REG_NOERROR; + } + } + + /* Proceed to next node. */ + cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, + &eps_via_nodes, fs); + + if (BE (cur_node < 0, 0)) + { + if (BE (cur_node == -2, 0)) + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + free_fail_stack_return (fs); + return REG_ESPACE; + } + if (fs) + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + &eps_via_nodes); + else + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return REG_NOMATCH; + } + } + } + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return free_fail_stack_return (fs); +} + +static reg_errcode_t +internal_function +free_fail_stack_return (struct re_fail_stack_t *fs) +{ + if (fs) + { + int fs_idx; + for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) + { + re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); + re_free (fs->stack[fs_idx].regs); + } + re_free (fs->stack); + } + return REG_NOERROR; +} + +static void +internal_function +update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch) +{ + int type = dfa->nodes[cur_node].type; + if (type == OP_OPEN_SUBEXP) + { + int reg_num = dfa->nodes[cur_node].opr.idx + 1; + + /* We are at the first node of this sub expression. */ + if (reg_num < nmatch) + { + pmatch[reg_num].rm_so = cur_idx; + pmatch[reg_num].rm_eo = -1; + } + } + else if (type == OP_CLOSE_SUBEXP) + { + int reg_num = dfa->nodes[cur_node].opr.idx + 1; + if (reg_num < nmatch) + { + /* We are at the last node of this sub expression. */ + if (pmatch[reg_num].rm_so < cur_idx) + { + pmatch[reg_num].rm_eo = cur_idx; + /* This is a non-empty match or we are not inside an optional + subexpression. Accept this right away. */ + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); + } + else + { + if (dfa->nodes[cur_node].opt_subexp + && prev_idx_match[reg_num].rm_so != -1) + /* We transited through an empty match for an optional + subexpression, like (a?)*, and this is not the subexp's + first match. Copy back the old content of the registers + so that matches of an inner subexpression are undone as + well, like in ((a?))*. */ + memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch); + else + /* We completed a subexpression, but it may be part of + an optional one, so do not update PREV_IDX_MATCH. */ + pmatch[reg_num].rm_eo = cur_idx; + } + } + } +} + +/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0 + and sift the nodes in each states according to the following rules. + Updated state_log will be wrote to STATE_LOG. + + Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... + 1. When STR_IDX == MATCH_LAST(the last index in the state_log): + If `a' isn't the LAST_NODE and `a' can't epsilon transit to + the LAST_NODE, we throw away the node `a'. + 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts + string `s' and transit to `b': + i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw + away the node `a'. + ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is + thrown away, we throw away the node `a'. + 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': + i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the + node `a'. + ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, + we throw away the node `a'. */ + +#define STATE_NODE_CONTAINS(state,node) \ + ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) + +static reg_errcode_t +internal_function +sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) +{ + reg_errcode_t err; + int null_cnt = 0; + int str_idx = sctx->last_str_idx; + re_node_set cur_dest; + +#ifdef DEBUG + assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); +#endif + + /* Build sifted state_log[str_idx]. It has the nodes which can epsilon + transit to the last_node and the last_node itself. */ + err = re_node_set_init_1 (&cur_dest, sctx->last_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* Then check each states in the state_log. */ + while (str_idx > 0) + { + /* Update counters. */ + null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; + if (null_cnt > mctx->max_mb_elem_len) + { + memset (sctx->sifted_states, '\0', + sizeof (re_dfastate_t *) * str_idx); + re_node_set_free (&cur_dest); + return REG_NOERROR; + } + re_node_set_empty (&cur_dest); + --str_idx; + + if (mctx->state_log[str_idx]) + { + err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + + /* Add all the nodes which satisfy the following conditions: + - It can epsilon transit to a node in CUR_DEST. + - It is in CUR_SRC. + And update state_log. */ + err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + err = REG_NOERROR; + free_return: + re_node_set_free (&cur_dest); + return err; +} + +static reg_errcode_t +internal_function +build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, re_node_set *cur_dest) +{ + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; + int i; + + /* Then build the next sifted state. + We build the next sifted state on `cur_dest', and update + `sifted_states[str_idx]' with `cur_dest'. + Note: + `cur_dest' is the sifted state from `state_log[str_idx + 1]'. + `cur_src' points the node_set of the old `state_log[str_idx]' + (with the epsilon nodes pre-filtered out). */ + for (i = 0; i < cur_src->nelem; i++) + { + int prev_node = cur_src->elems[i]; + int naccepted = 0; + int ret; + +#ifdef DEBUG + re_token_type_t type = dfa->nodes[prev_node].type; + assert (!IS_EPSILON_NODE (type)); +#endif +#ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (dfa->nodes[prev_node].accept_mb) + naccepted = sift_states_iter_mb (mctx, sctx, prev_node, + str_idx, sctx->last_str_idx); +#endif /* RE_ENABLE_I18N */ + + /* We don't check backreferences here. + See update_cur_sifted_state(). */ + if (!naccepted + && check_node_accept (mctx, dfa->nodes + prev_node, str_idx) + && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], + dfa->nexts[prev_node])) + naccepted = 1; + + if (naccepted == 0) + continue; + + if (sctx->limits.nelem) + { + int to_idx = str_idx + naccepted; + if (check_dst_limits (mctx, &sctx->limits, + dfa->nexts[prev_node], to_idx, + prev_node, str_idx)) + continue; + } + ret = re_node_set_insert (cur_dest, prev_node); + if (BE (ret == -1, 0)) + return REG_ESPACE; + } + + return REG_NOERROR; +} + +/* Helper functions. */ + +static reg_errcode_t +internal_function +clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) +{ + int top = mctx->state_log_top; + + if (next_state_log_idx >= mctx->input.bufs_len + || (next_state_log_idx >= mctx->input.valid_len + && mctx->input.valid_len < mctx->input.len)) + { + reg_errcode_t err; + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (top < next_state_log_idx) + { + memset (mctx->state_log + top + 1, '\0', + sizeof (re_dfastate_t *) * (next_state_log_idx - top)); + mctx->state_log_top = next_state_log_idx; + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, + re_dfastate_t **src, int num) +{ + int st_idx; + reg_errcode_t err; + for (st_idx = 0; st_idx < num; ++st_idx) + { + if (dst[st_idx] == NULL) + dst[st_idx] = src[st_idx]; + else if (src[st_idx] != NULL) + { + re_node_set merged_set; + err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, + &src[st_idx]->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); + re_node_set_free (&merged_set); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *dest_nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; + const re_node_set *candidates; + candidates = ((mctx->state_log[str_idx] == NULL) ? NULL + : &mctx->state_log[str_idx]->nodes); + + if (dest_nodes->nelem == 0) + sctx->sifted_states[str_idx] = NULL; + else + { + if (candidates) + { + /* At first, add the nodes which can epsilon transit to a node in + DEST_NODE. */ + err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* Then, check the limitations in the current sift_context. */ + if (sctx->limits.nelem) + { + err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, + mctx->bkref_ents, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + + sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (candidates && mctx->state_log[str_idx]->has_backref) + { + err = sift_states_bkref (mctx, sctx, str_idx, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates) +{ + reg_errcode_t err = REG_NOERROR; + int i; + + re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (!state->inveclosure.alloc) + { + err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + for (i = 0; i < dest_nodes->nelem; i++) + re_node_set_merge (&state->inveclosure, + dfa->inveclosures + dest_nodes->elems[i]); + } + return re_node_set_add_intersect (dest_nodes, candidates, + &state->inveclosure); +} + +static reg_errcode_t +internal_function +sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes, + const re_node_set *candidates) +{ + int ecl_idx; + reg_errcode_t err; + re_node_set *inv_eclosure = dfa->inveclosures + node; + re_node_set except_nodes; + re_node_set_init_empty (&except_nodes); + for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) + { + int cur_node = inv_eclosure->elems[ecl_idx]; + if (cur_node == node) + continue; + if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) + { + int edst1 = dfa->edests[cur_node].elems[0]; + int edst2 = ((dfa->edests[cur_node].nelem > 1) + ? dfa->edests[cur_node].elems[1] : -1); + if ((!re_node_set_contains (inv_eclosure, edst1) + && re_node_set_contains (dest_nodes, edst1)) + || (edst2 > 0 + && !re_node_set_contains (inv_eclosure, edst2) + && re_node_set_contains (dest_nodes, edst2))) + { + err = re_node_set_add_intersect (&except_nodes, candidates, + dfa->inveclosures + cur_node); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&except_nodes); + return err; + } + } + } + } + for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) + { + int cur_node = inv_eclosure->elems[ecl_idx]; + if (!re_node_set_contains (&except_nodes, cur_node)) + { + int idx = re_node_set_contains (dest_nodes, cur_node) - 1; + re_node_set_remove_at (dest_nodes, idx); + } + } + re_node_set_free (&except_nodes); + return REG_NOERROR; +} + +static int +internal_function +check_dst_limits (const re_match_context_t *mctx, re_node_set *limits, + int dst_node, int dst_idx, int src_node, int src_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int lim_idx, src_pos, dst_pos; + + int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); + int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); + for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) + { + int subexp_idx; + struct re_backref_cache_entry *ent; + ent = mctx->bkref_ents + limits->elems[lim_idx]; + subexp_idx = dfa->nodes[ent->node].opr.idx; + + dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], + subexp_idx, dst_node, dst_idx, + dst_bkref_idx); + src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], + subexp_idx, src_node, src_idx, + src_bkref_idx); + + /* In case of: + ( ) + ( ) + ( ) */ + if (src_pos == dst_pos) + continue; /* This is unrelated limitation. */ + else + return 1; + } + return 0; +} + +static int +internal_function +check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, + int subexp_idx, int from_node, int bkref_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *eclosures = dfa->eclosures + from_node; + int node_idx; + + /* Else, we are on the boundary: examine the nodes on the epsilon + closure. */ + for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) + { + int node = eclosures->elems[node_idx]; + switch (dfa->nodes[node].type) + { + case OP_BACK_REF: + if (bkref_idx != -1) + { + struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; + do + { + int dst, cpos; + + if (ent->node != node) + continue; + + if (subexp_idx < BITSET_WORD_BITS + && !(ent->eps_reachable_subexps_map + & ((bitset_word_t) 1 << subexp_idx))) + continue; + + /* Recurse trying to reach the OP_OPEN_SUBEXP and + OP_CLOSE_SUBEXP cases below. But, if the + destination node is the same node as the source + node, don't recurse because it would cause an + infinite loop: a regex that exhibits this behavior + is ()\1*\1* */ + dst = dfa->edests[node].elems[0]; + if (dst == from_node) + { + if (boundaries & 1) + return -1; + else /* if (boundaries & 2) */ + return 0; + } + + cpos = + check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + dst, bkref_idx); + if (cpos == -1 /* && (boundaries & 1) */) + return -1; + if (cpos == 0 && (boundaries & 2)) + return 0; + + if (subexp_idx < BITSET_WORD_BITS) + ent->eps_reachable_subexps_map + &= ~((bitset_word_t) 1 << subexp_idx); + } + while (ent++->more); + } + break; + + case OP_OPEN_SUBEXP: + if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx) + return -1; + break; + + case OP_CLOSE_SUBEXP: + if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx) + return 0; + break; + + default: + break; + } + } + + return (boundaries & 2) ? 1 : 0; +} + +static int +internal_function +check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit, + int subexp_idx, int from_node, int str_idx, + int bkref_idx) +{ + struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; + int boundaries; + + /* If we are outside the range of the subexpression, return -1 or 1. */ + if (str_idx < lim->subexp_from) + return -1; + + if (lim->subexp_to < str_idx) + return 1; + + /* If we are within the subexpression, return 0. */ + boundaries = (str_idx == lim->subexp_from); + boundaries |= (str_idx == lim->subexp_to) << 1; + if (boundaries == 0) + return 0; + + /* Else, examine epsilon closure. */ + return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + from_node, bkref_idx); +} + +/* Check the limitations of sub expressions LIMITS, and remove the nodes + which are against limitations from DEST_NODES. */ + +static reg_errcode_t +internal_function +check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates, re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, int str_idx) +{ + reg_errcode_t err; + int node_idx, lim_idx; + + for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) + { + int subexp_idx; + struct re_backref_cache_entry *ent; + ent = bkref_ents + limits->elems[lim_idx]; + + if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) + continue; /* This is unrelated limitation. */ + + subexp_idx = dfa->nodes[ent->node].opr.idx; + if (ent->subexp_to == str_idx) + { + int ops_node = -1; + int cls_node = -1; + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + re_token_type_t type = dfa->nodes[node].type; + if (type == OP_OPEN_SUBEXP + && subexp_idx == dfa->nodes[node].opr.idx) + ops_node = node; + else if (type == OP_CLOSE_SUBEXP + && subexp_idx == dfa->nodes[node].opr.idx) + cls_node = node; + } + + /* Check the limitation of the open subexpression. */ + /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ + if (ops_node >= 0) + { + err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + /* Check the limitation of the close subexpression. */ + if (cls_node >= 0) + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + if (!re_node_set_contains (dfa->inveclosures + node, + cls_node) + && !re_node_set_contains (dfa->eclosures + node, + cls_node)) + { + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + --node_idx; + } + } + } + else /* (ent->subexp_to != str_idx) */ + { + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + re_token_type_t type = dfa->nodes[node].type; + if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) + { + if (subexp_idx != dfa->nodes[node].opr.idx) + continue; + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int node_idx, node; + re_sift_context_t local_sctx; + int first_idx = search_cur_bkref_entry (mctx, str_idx); + + if (first_idx == -1) + return REG_NOERROR; + + local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ + + for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) + { + int enabled_idx; + re_token_type_t type; + struct re_backref_cache_entry *entry; + node = candidates->elems[node_idx]; + type = dfa->nodes[node].type; + /* Avoid infinite loop for the REs like "()\1+". */ + if (node == sctx->last_node && str_idx == sctx->last_str_idx) + continue; + if (type != OP_BACK_REF) + continue; + + entry = mctx->bkref_ents + first_idx; + enabled_idx = first_idx; + do + { + int subexp_len; + int to_idx; + int dst_node; + int ret; + re_dfastate_t *cur_state; + + if (entry->node != node) + continue; + subexp_len = entry->subexp_to - entry->subexp_from; + to_idx = str_idx + subexp_len; + dst_node = (subexp_len ? dfa->nexts[node] + : dfa->edests[node].elems[0]); + + if (to_idx > sctx->last_str_idx + || sctx->sifted_states[to_idx] == NULL + || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node) + || check_dst_limits (mctx, &sctx->limits, node, + str_idx, dst_node, to_idx)) + continue; + + if (local_sctx.sifted_states == NULL) + { + local_sctx = *sctx; + err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + local_sctx.last_node = node; + local_sctx.last_str_idx = str_idx; + ret = re_node_set_insert (&local_sctx.limits, enabled_idx); + if (BE (ret < 0, 0)) + { + err = REG_ESPACE; + goto free_return; + } + cur_state = local_sctx.sifted_states[str_idx]; + err = sift_states_backward (mctx, &local_sctx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + if (sctx->limited_states != NULL) + { + err = merge_state_array (dfa, sctx->limited_states, + local_sctx.sifted_states, + str_idx + 1); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + local_sctx.sifted_states[str_idx] = cur_state; + re_node_set_remove (&local_sctx.limits, enabled_idx); + + /* mctx->bkref_ents may have changed, reload the pointer. */ + entry = mctx->bkref_ents + enabled_idx; + } + while (enabled_idx++, entry++->more); + } + err = REG_NOERROR; + free_return: + if (local_sctx.sifted_states != NULL) + { + re_node_set_free (&local_sctx.limits); + } + + return err; +} + + +#ifdef RE_ENABLE_I18N +static int +internal_function +sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int naccepted; + /* Check the node can accept `multi byte'. */ + naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); + if (naccepted > 0 && str_idx + naccepted <= max_str_idx && + !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], + dfa->nexts[node_idx])) + /* The node can't accept the `multi byte', or the + destination was already thrown away, then the node + could't accept the current input `multi byte'. */ + naccepted = 0; + /* Otherwise, it is sure that the node could accept + `naccepted' bytes input. */ + return naccepted; +} +#endif /* RE_ENABLE_I18N */ + + +/* Functions for state transition. */ + +/* Return the next state to which the current state STATE will transit by + accepting the current input byte, and update STATE_LOG if necessary. + If STATE can accept a multibyte char/collating element/back reference + update the destination of STATE_LOG. */ + +static re_dfastate_t * +internal_function +transit_state (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) +{ + re_dfastate_t **trtable; + unsigned char ch; + +#ifdef RE_ENABLE_I18N + /* If the current state can accept multibyte. */ + if (BE (state->accept_mb, 0)) + { + *err = transit_state_mb (mctx, state); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } +#endif /* RE_ENABLE_I18N */ + + /* Then decide the next state with the single byte. */ +#if 0 + if (0) + /* don't use transition table */ + return transit_state_sb (err, mctx, state); +#endif + + /* Use transition table */ + ch = re_string_fetch_byte (&mctx->input); + for (;;) + { + trtable = state->trtable; + if (BE (trtable != NULL, 1)) + return trtable[ch]; + + trtable = state->word_trtable; + if (BE (trtable != NULL, 1)) + { + unsigned int context; + context + = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input) - 1, + mctx->eflags); + if (IS_WORD_CONTEXT (context)) + return trtable[ch + SBC_MAX]; + else + return trtable[ch]; + } + + if (!build_trtable (mctx->dfa, state)) + { + *err = REG_ESPACE; + return NULL; + } + + /* Retry, we now have a transition table. */ + } +} + +/* Update the state_log if we need */ +re_dfastate_t * +internal_function +merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *next_state) +{ + const re_dfa_t *const dfa = mctx->dfa; + int cur_idx = re_string_cur_idx (&mctx->input); + + if (cur_idx > mctx->state_log_top) + { + mctx->state_log[cur_idx] = next_state; + mctx->state_log_top = cur_idx; + } + else if (mctx->state_log[cur_idx] == 0) + { + mctx->state_log[cur_idx] = next_state; + } + else + { + re_dfastate_t *pstate; + unsigned int context; + re_node_set next_nodes, *log_nodes, *table_nodes = NULL; + /* If (state_log[cur_idx] != 0), it implies that cur_idx is + the destination of a multibyte char/collating element/ + back reference. Then the next state is the union set of + these destinations and the results of the transition table. */ + pstate = mctx->state_log[cur_idx]; + log_nodes = pstate->entrance_nodes; + if (next_state != NULL) + { + table_nodes = next_state->entrance_nodes; + *err = re_node_set_init_union (&next_nodes, table_nodes, + log_nodes); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } + else + next_nodes = *log_nodes; + /* Note: We already add the nodes of the initial state, + then we don't need to add them here. */ + + context = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input) - 1, + mctx->eflags); + next_state = mctx->state_log[cur_idx] + = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + + if (table_nodes != NULL) + re_node_set_free (&next_nodes); + } + + if (BE (dfa->nbackref, 0) && next_state != NULL) + { + /* Check OP_OPEN_SUBEXP in the current state in case that we use them + later. We must check them here, since the back references in the + next state might use them. */ + *err = check_subexp_matching_top (mctx, &next_state->nodes, + cur_idx); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + + /* If the next state has back references. */ + if (next_state->has_backref) + { + *err = transit_state_bkref (mctx, &next_state->nodes); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + next_state = mctx->state_log[cur_idx]; + } + } + + return next_state; +} + +/* Skip bytes in the input that correspond to part of a + multi-byte match, then look in the log for a state + from which to restart matching. */ +re_dfastate_t * +internal_function +find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) +{ + re_dfastate_t *cur_state; + do + { + int max = mctx->state_log_top; + int cur_str_idx = re_string_cur_idx (&mctx->input); + + do + { + if (++cur_str_idx > max) + return NULL; + re_string_skip_bytes (&mctx->input, 1); + } + while (mctx->state_log[cur_str_idx] == NULL); + + cur_state = merge_state_with_log (err, mctx, NULL); + } + while (*err == REG_NOERROR && cur_state == NULL); + return cur_state; +} + +/* Helper functions for transit_state. */ + +/* From the node set CUR_NODES, pick up the nodes whose types are + OP_OPEN_SUBEXP and which have corresponding back references in the regular + expression. And register them to use them later for evaluating the + correspoding back references. */ + +static reg_errcode_t +internal_function +check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, + int str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int node_idx; + reg_errcode_t err; + + /* TODO: This isn't efficient. + Because there might be more than one nodes whose types are + OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all + nodes. + E.g. RE: (a){2} */ + for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) + { + int node = cur_nodes->elems[node_idx]; + if (dfa->nodes[node].type == OP_OPEN_SUBEXP + && dfa->nodes[node].opr.idx < BITSET_WORD_BITS + && (dfa->used_bkref_map + & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx))) + { + err = match_ctx_add_subtop (mctx, node, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; +} + +#if 0 +/* Return the next state to which the current state STATE will transit by + accepting the current input byte. */ + +static re_dfastate_t * +transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) +{ + const re_dfa_t *const dfa = mctx->dfa; + re_node_set next_nodes; + re_dfastate_t *next_state; + int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); + unsigned int context; + + *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) + { + int cur_node = state->nodes.elems[node_cnt]; + if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx)) + { + *err = re_node_set_merge (&next_nodes, + dfa->eclosures + dfa->nexts[cur_node]); + if (BE (*err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return NULL; + } + } + } + context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags); + next_state = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + + re_node_set_free (&next_nodes); + re_string_skip_bytes (&mctx->input, 1); + return next_state; +} +#endif + +#ifdef RE_ENABLE_I18N +static reg_errcode_t +internal_function +transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int i; + + for (i = 0; i < pstate->nodes.nelem; ++i) + { + re_node_set dest_nodes, *new_nodes; + int cur_node_idx = pstate->nodes.elems[i]; + int naccepted, dest_idx; + unsigned int context; + re_dfastate_t *dest_state; + + if (!dfa->nodes[cur_node_idx].accept_mb) + continue; + + if (dfa->nodes[cur_node_idx].constraint) + { + context = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input), + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint, + context)) + continue; + } + + /* How many bytes the node can accept? */ + naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input, + re_string_cur_idx (&mctx->input)); + if (naccepted == 0) + continue; + + /* The node can accepts `naccepted' bytes. */ + dest_idx = re_string_cur_idx (&mctx->input) + naccepted; + mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted + : mctx->max_mb_elem_len); + err = clean_state_log_if_needed (mctx, dest_idx); + if (BE (err != REG_NOERROR, 0)) + return err; +#ifdef DEBUG + assert (dfa->nexts[cur_node_idx] != -1); +#endif + new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; + + dest_state = mctx->state_log[dest_idx]; + if (dest_state == NULL) + dest_nodes = *new_nodes; + else + { + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, new_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + context = re_string_context_at (&mctx->input, dest_idx - 1, + mctx->eflags); + mctx->state_log[dest_idx] + = re_acquire_state_context (&err, dfa, &dest_nodes, context); + if (dest_state != NULL) + re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) + return err; + } + return REG_NOERROR; +} +#endif /* RE_ENABLE_I18N */ + +static reg_errcode_t +internal_function +transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int i; + int cur_str_idx = re_string_cur_idx (&mctx->input); + + for (i = 0; i < nodes->nelem; ++i) + { + int dest_str_idx, prev_nelem, bkc_idx; + int node_idx = nodes->elems[i]; + unsigned int context; + const re_token_t *node = dfa->nodes + node_idx; + re_node_set *new_dest_nodes; + + /* Check whether `node' is a backreference or not. */ + if (node->type != OP_BACK_REF) + continue; + + if (node->constraint) + { + context = re_string_context_at (&mctx->input, cur_str_idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + continue; + } + + /* `node' is a backreference. + Check the substring which the substring matched. */ + bkc_idx = mctx->nbkref_ents; + err = get_subexp (mctx, node_idx, cur_str_idx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* And add the epsilon closures (which is `new_dest_nodes') of + the backreference to appropriate state_log. */ +#ifdef DEBUG + assert (dfa->nexts[node_idx] != -1); +#endif + for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) + { + int subexp_len; + re_dfastate_t *dest_state; + struct re_backref_cache_entry *bkref_ent; + bkref_ent = mctx->bkref_ents + bkc_idx; + if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) + continue; + subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; + new_dest_nodes = (subexp_len == 0 + ? dfa->eclosures + dfa->edests[node_idx].elems[0] + : dfa->eclosures + dfa->nexts[node_idx]); + dest_str_idx = (cur_str_idx + bkref_ent->subexp_to + - bkref_ent->subexp_from); + context = re_string_context_at (&mctx->input, dest_str_idx - 1, + mctx->eflags); + dest_state = mctx->state_log[dest_str_idx]; + prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 + : mctx->state_log[cur_str_idx]->nodes.nelem); + /* Add `new_dest_node' to state_log. */ + if (dest_state == NULL) + { + mctx->state_log[dest_str_idx] + = re_acquire_state_context (&err, dfa, new_dest_nodes, + context); + if (BE (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR, 0)) + goto free_return; + } + else + { + re_node_set dest_nodes; + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, + new_dest_nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&dest_nodes); + goto free_return; + } + mctx->state_log[dest_str_idx] + = re_acquire_state_context (&err, dfa, &dest_nodes, context); + re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR, 0)) + goto free_return; + } + /* We need to check recursively if the backreference can epsilon + transit. */ + if (subexp_len == 0 + && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem) + { + err = check_subexp_matching_top (mctx, new_dest_nodes, + cur_str_idx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + err = transit_state_bkref (mctx, new_dest_nodes); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + } + } + err = REG_NOERROR; + free_return: + return err; +} + +/* Enumerate all the candidates which the backreference BKREF_NODE can match + at BKREF_STR_IDX, and register them by match_ctx_add_entry(). + Note that we might collect inappropriate candidates here. + However, the cost of checking them strictly here is too high, then we + delay these checking for prune_impossible_nodes(). */ + +static reg_errcode_t +internal_function +get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int subexp_num, sub_top_idx; + const char *buf = (const char *) re_string_get_buffer (&mctx->input); + /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ + int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); + if (cache_idx != -1) + { + const struct re_backref_cache_entry *entry + = mctx->bkref_ents + cache_idx; + do + if (entry->node == bkref_node) + return REG_NOERROR; /* We already checked it. */ + while (entry++->more); + } + + subexp_num = dfa->nodes[bkref_node].opr.idx; + + /* For each sub expression */ + for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx) + { + reg_errcode_t err; + re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; + re_sub_match_last_t *sub_last; + int sub_last_idx, sl_str, bkref_str_off; + + if (dfa->nodes[sub_top->node].opr.idx != subexp_num) + continue; /* It isn't related. */ + + sl_str = sub_top->str_idx; + bkref_str_off = bkref_str_idx; + /* At first, check the last node of sub expressions we already + evaluated. */ + for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) + { + int sl_str_diff; + sub_last = sub_top->lasts[sub_last_idx]; + sl_str_diff = sub_last->str_idx - sl_str; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_diff > 0) + { + if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)) + { + /* Not enough chars for a successful match. */ + if (bkref_str_off + sl_str_diff > mctx->input.len) + break; + + err = clean_state_log_if_needed (mctx, + bkref_str_off + + sl_str_diff); + if (BE (err != REG_NOERROR, 0)) + return err; + buf = (const char *) re_string_get_buffer (&mctx->input); + } + if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0) + /* We don't need to search this sub expression any more. */ + break; + } + bkref_str_off += sl_str_diff; + sl_str += sl_str_diff; + err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, + bkref_str_idx); + + /* Reload buf, since the preceding call might have reallocated + the buffer. */ + buf = (const char *) re_string_get_buffer (&mctx->input); + + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (sub_last_idx < sub_top->nlasts) + continue; + if (sub_last_idx > 0) + ++sl_str; + /* Then, search for the other last nodes of the sub expression. */ + for (; sl_str <= bkref_str_idx; ++sl_str) + { + int cls_node, sl_str_off; + const re_node_set *nodes; + sl_str_off = sl_str - sub_top->str_idx; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_off > 0) + { + if (BE (bkref_str_off >= mctx->input.valid_len, 0)) + { + /* If we are at the end of the input, we cannot match. */ + if (bkref_str_off >= mctx->input.len) + break; + + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + + buf = (const char *) re_string_get_buffer (&mctx->input); + } + if (buf [bkref_str_off++] != buf[sl_str - 1]) + break; /* We don't need to search this sub expression + any more. */ + } + if (mctx->state_log[sl_str] == NULL) + continue; + /* Does this state have a ')' of the sub expression? */ + nodes = &mctx->state_log[sl_str]->nodes; + cls_node = find_subexp_node (dfa, nodes, subexp_num, + OP_CLOSE_SUBEXP); + if (cls_node == -1) + continue; /* No. */ + if (sub_top->path == NULL) + { + sub_top->path = calloc (sizeof (state_array_t), + sl_str - sub_top->str_idx + 1); + if (sub_top->path == NULL) + return REG_ESPACE; + } + /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node + in the current context? */ + err = check_arrival (mctx, sub_top->path, sub_top->node, + sub_top->str_idx, cls_node, sl_str, + OP_CLOSE_SUBEXP); + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); + if (BE (sub_last == NULL, 0)) + return REG_ESPACE; + err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, + bkref_str_idx); + if (err == REG_NOMATCH) + continue; + } + } + return REG_NOERROR; +} + +/* Helper functions for get_subexp(). */ + +/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR. + If it can arrive, register the sub expression expressed with SUB_TOP + and SUB_LAST. */ + +static reg_errcode_t +internal_function +get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, int bkref_node, int bkref_str) +{ + reg_errcode_t err; + int to_idx; + /* Can the subexpression arrive the back reference? */ + err = check_arrival (mctx, &sub_last->path, sub_last->node, + sub_last->str_idx, bkref_node, bkref_str, + OP_OPEN_SUBEXP); + if (err != REG_NOERROR) + return err; + err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, + sub_last->str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; + return clean_state_log_if_needed (mctx, to_idx); +} + +/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX. + Search '(' if FL_OPEN, or search ')' otherwise. + TODO: This function isn't efficient... + Because there might be more than one nodes whose types are + OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all + nodes. + E.g. RE: (a){2} */ + +static int +internal_function +find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) +{ + int cls_idx; + for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) + { + int cls_node = nodes->elems[cls_idx]; + const re_token_t *node = dfa->nodes + cls_node; + if (node->type == type + && node->opr.idx == subexp_idx) + return cls_node; + } + return -1; +} + +/* Check whether the node TOP_NODE at TOP_STR can arrive to the node + LAST_NODE at LAST_STR. We record the path onto PATH since it will be + heavily reused. + Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ + +static reg_errcode_t +internal_function +check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node, + int top_str, int last_node, int last_str, int type) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; + int subexp_num, backup_cur_idx, str_idx, null_cnt; + re_dfastate_t *cur_state = NULL; + re_node_set *cur_nodes, next_nodes; + re_dfastate_t **backup_state_log; + unsigned int context; + + subexp_num = dfa->nodes[top_node].opr.idx; + /* Extend the buffer if we need. */ + if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0)) + { + re_dfastate_t **new_array; + int old_alloc = path->alloc; + path->alloc += last_str + mctx->max_mb_elem_len + 1; + new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); + if (BE (new_array == NULL, 0)) + { + path->alloc = old_alloc; + return REG_ESPACE; + } + path->array = new_array; + memset (new_array + old_alloc, '\0', + sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); + } + + str_idx = path->next_idx ? 0 : top_str; + + /* Temporary modify MCTX. */ + backup_state_log = mctx->state_log; + backup_cur_idx = mctx->input.cur_idx; + mctx->state_log = path->array; + mctx->input.cur_idx = str_idx; + + /* Setup initial node set. */ + context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); + if (str_idx == top_str) + { + err = re_node_set_init_1 (&next_nodes, top_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + else + { + cur_state = mctx->state_log[str_idx]; + if (cur_state && cur_state->has_backref) + { + err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + re_node_set_init_empty (&next_nodes); + } + if (str_idx == top_str || (cur_state && cur_state->has_backref)) + { + if (next_nodes.nelem) + { + err = expand_bkref_cache (mctx, &next_nodes, str_idx, + subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + mctx->state_log[str_idx] = cur_state; + } + + for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;) + { + re_node_set_empty (&next_nodes); + if (mctx->state_log[str_idx + 1]) + { + err = re_node_set_merge (&next_nodes, + &mctx->state_log[str_idx + 1]->nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + if (cur_state) + { + err = check_arrival_add_next_nodes (mctx, str_idx, + &cur_state->non_eps_nodes, + &next_nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + ++str_idx; + if (next_nodes.nelem) + { + err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + err = expand_bkref_cache (mctx, &next_nodes, str_idx, + subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + mctx->state_log[str_idx] = cur_state; + null_cnt = cur_state == NULL ? null_cnt + 1 : 0; + } + re_node_set_free (&next_nodes); + cur_nodes = (mctx->state_log[last_str] == NULL ? NULL + : &mctx->state_log[last_str]->nodes); + path->next_idx = str_idx; + + /* Fix MCTX. */ + mctx->state_log = backup_state_log; + mctx->input.cur_idx = backup_cur_idx; + + /* Then check the current node set has the node LAST_NODE. */ + if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node)) + return REG_NOERROR; + + return REG_NOMATCH; +} + +/* Helper functions for check_arrival. */ + +/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them + to NEXT_NODES. + TODO: This function is similar to the functions transit_state*(), + however this function has many additional works. + Can't we unify them? */ + +static reg_errcode_t +internal_function +check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, + re_node_set *cur_nodes, re_node_set *next_nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + int result; + int cur_idx; + reg_errcode_t err = REG_NOERROR; + re_node_set union_set; + re_node_set_init_empty (&union_set); + for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) + { + int naccepted = 0; + int cur_node = cur_nodes->elems[cur_idx]; +#ifdef DEBUG + re_token_type_t type = dfa->nodes[cur_node].type; + assert (!IS_EPSILON_NODE (type)); +#endif +#ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (dfa->nodes[cur_node].accept_mb) + { + naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, + str_idx); + if (naccepted > 1) + { + re_dfastate_t *dest_state; + int next_node = dfa->nexts[cur_node]; + int next_idx = str_idx + naccepted; + dest_state = mctx->state_log[next_idx]; + re_node_set_empty (&union_set); + if (dest_state) + { + err = re_node_set_merge (&union_set, &dest_state->nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&union_set); + return err; + } + } + result = re_node_set_insert (&union_set, next_node); + if (BE (result < 0, 0)) + { + re_node_set_free (&union_set); + return REG_ESPACE; + } + mctx->state_log[next_idx] = re_acquire_state (&err, dfa, + &union_set); + if (BE (mctx->state_log[next_idx] == NULL + && err != REG_NOERROR, 0)) + { + re_node_set_free (&union_set); + return err; + } + } + } +#endif /* RE_ENABLE_I18N */ + if (naccepted + || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) + { + result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); + if (BE (result < 0, 0)) + { + re_node_set_free (&union_set); + return REG_ESPACE; + } + } + } + re_node_set_free (&union_set); + return REG_NOERROR; +} + +/* For all the nodes in CUR_NODES, add the epsilon closures of them to + CUR_NODES, however exclude the nodes which are: + - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN. + - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN. +*/ + +static reg_errcode_t +internal_function +check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, + int ex_subexp, int type) +{ + reg_errcode_t err; + int idx, outside_node; + re_node_set new_nodes; +#ifdef DEBUG + assert (cur_nodes->nelem); +#endif + err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); + if (BE (err != REG_NOERROR, 0)) + return err; + /* Create a new node set NEW_NODES with the nodes which are epsilon + closures of the node in CUR_NODES. */ + + for (idx = 0; idx < cur_nodes->nelem; ++idx) + { + int cur_node = cur_nodes->elems[idx]; + const re_node_set *eclosure = dfa->eclosures + cur_node; + outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); + if (outside_node == -1) + { + /* There are no problematic nodes, just merge them. */ + err = re_node_set_merge (&new_nodes, eclosure); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&new_nodes); + return err; + } + } + else + { + /* There are problematic nodes, re-calculate incrementally. */ + err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&new_nodes); + return err; + } + } + } + re_node_set_free (cur_nodes); + *cur_nodes = new_nodes; + return REG_NOERROR; +} + +/* Helper function for check_arrival_expand_ecl. + Check incrementally the epsilon closure of TARGET, and if it isn't + problematic append it to DST_NODES. */ + +static reg_errcode_t +internal_function +check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, + int target, int ex_subexp, int type) +{ + int cur_node; + for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) + { + int err; + + if (dfa->nodes[cur_node].type == type + && dfa->nodes[cur_node].opr.idx == ex_subexp) + { + if (type == OP_CLOSE_SUBEXP) + { + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + } + break; + } + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + if (dfa->edests[cur_node].nelem == 0) + break; + if (dfa->edests[cur_node].nelem == 2) + { + err = check_arrival_expand_ecl_sub (dfa, dst_nodes, + dfa->edests[cur_node].elems[1], + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + return err; + } + cur_node = dfa->edests[cur_node].elems[0]; + } + return REG_NOERROR; +} + + +/* For all the back references in the current state, calculate the + destination of the back references by the appropriate entry + in MCTX->BKREF_ENTS. */ + +static reg_errcode_t +internal_function +expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, + int cur_str, int subexp_num, int type) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int cache_idx_start = search_cur_bkref_entry (mctx, cur_str); + struct re_backref_cache_entry *ent; + + if (cache_idx_start == -1) + return REG_NOERROR; + + restart: + ent = mctx->bkref_ents + cache_idx_start; + do + { + int to_idx, next_node; + + /* Is this entry ENT is appropriate? */ + if (!re_node_set_contains (cur_nodes, ent->node)) + continue; /* No. */ + + to_idx = cur_str + ent->subexp_to - ent->subexp_from; + /* Calculate the destination of the back reference, and append it + to MCTX->STATE_LOG. */ + if (to_idx == cur_str) + { + /* The backreference did epsilon transit, we must re-check all the + node in the current state. */ + re_node_set new_dests; + reg_errcode_t err2, err3; + next_node = dfa->edests[ent->node].elems[0]; + if (re_node_set_contains (cur_nodes, next_node)) + continue; + err = re_node_set_init_1 (&new_dests, next_node); + err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type); + err3 = re_node_set_merge (cur_nodes, &new_dests); + re_node_set_free (&new_dests); + if (BE (err != REG_NOERROR || err2 != REG_NOERROR + || err3 != REG_NOERROR, 0)) + { + err = (err != REG_NOERROR ? err + : (err2 != REG_NOERROR ? err2 : err3)); + return err; + } + /* TODO: It is still inefficient... */ + goto restart; + } + else + { + re_node_set union_set; + next_node = dfa->nexts[ent->node]; + if (mctx->state_log[to_idx]) + { + int ret; + if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, + next_node)) + continue; + err = re_node_set_init_copy (&union_set, + &mctx->state_log[to_idx]->nodes); + ret = re_node_set_insert (&union_set, next_node); + if (BE (err != REG_NOERROR || ret < 0, 0)) + { + re_node_set_free (&union_set); + err = err != REG_NOERROR ? err : REG_ESPACE; + return err; + } + } + else + { + err = re_node_set_init_1 (&union_set, next_node); + if (BE (err != REG_NOERROR, 0)) + return err; + } + mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); + re_node_set_free (&union_set); + if (BE (mctx->state_log[to_idx] == NULL + && err != REG_NOERROR, 0)) + return err; + } + } + while (ent++->more); + return REG_NOERROR; +} + +/* Build transition table for the state. + Return 1 if succeeded, otherwise return NULL. */ + +static int +internal_function +build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) +{ + reg_errcode_t err; + int i, j, ch, need_word_trtable = 0; + bitset_word_t elem, mask; + bool dests_node_malloced = false; + bool dest_states_malloced = false; + int ndests; /* Number of the destination states from `state'. */ + re_dfastate_t **trtable; + re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; + re_node_set follows, *dests_node; + bitset_t *dests_ch; + bitset_t acceptable; + + struct dests_alloc + { + re_node_set dests_node[SBC_MAX]; + bitset_t dests_ch[SBC_MAX]; + } *dests_alloc; + + /* We build DFA states which corresponds to the destination nodes + from `state'. `dests_node[i]' represents the nodes which i-th + destination state contains, and `dests_ch[i]' represents the + characters which i-th destination state accepts. */ + if (__libc_use_alloca (sizeof (struct dests_alloc))) + dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); + else + { + dests_alloc = re_malloc (struct dests_alloc, 1); + if (BE (dests_alloc == NULL, 0)) + return 0; + dests_node_malloced = true; + } + dests_node = dests_alloc->dests_node; + dests_ch = dests_alloc->dests_ch; + + /* Initialize transiton table. */ + state->word_trtable = state->trtable = NULL; + + /* At first, group all nodes belonging to `state' into several + destinations. */ + ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); + if (BE (ndests <= 0, 0)) + { + if (dests_node_malloced) + free (dests_alloc); + /* Return 0 in case of an error, 1 otherwise. */ + if (ndests == 0) + { + state->trtable = (re_dfastate_t **) + calloc (sizeof (re_dfastate_t *), SBC_MAX); + return 1; + } + return 0; + } + + err = re_node_set_alloc (&follows, ndests + 1); + if (BE (err != REG_NOERROR, 0)) + goto out_free; + + if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX + + ndests * 3 * sizeof (re_dfastate_t *))) + dest_states = (re_dfastate_t **) + alloca (ndests * 3 * sizeof (re_dfastate_t *)); + else + { + dest_states = (re_dfastate_t **) + malloc (ndests * 3 * sizeof (re_dfastate_t *)); + if (BE (dest_states == NULL, 0)) + { +out_free: + if (dest_states_malloced) + free (dest_states); + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + if (dests_node_malloced) + free (dests_alloc); + return 0; + } + dest_states_malloced = true; + } + dest_states_word = dest_states + ndests; + dest_states_nl = dest_states_word + ndests; + bitset_empty (acceptable); + + /* Then build the states for all destinations. */ + for (i = 0; i < ndests; ++i) + { + int next_node; + re_node_set_empty (&follows); + /* Merge the follows of this destination states. */ + for (j = 0; j < dests_node[i].nelem; ++j) + { + next_node = dfa->nexts[dests_node[i].elems[j]]; + if (next_node != -1) + { + err = re_node_set_merge (&follows, dfa->eclosures + next_node); + if (BE (err != REG_NOERROR, 0)) + goto out_free; + } + } + dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); + if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + /* If the new state has context constraint, + build appropriate states for these contexts. */ + if (dest_states[i]->has_constraint) + { + dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, + CONTEXT_WORD); + if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + + if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) + need_word_trtable = 1; + + dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, + CONTEXT_NEWLINE); + if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + } + else + { + dest_states_word[i] = dest_states[i]; + dest_states_nl[i] = dest_states[i]; + } + bitset_merge (acceptable, dests_ch[i]); + } + + if (!BE (need_word_trtable, 0)) + { + /* We don't care about whether the following character is a word + character, or we are in a single-byte character set so we can + discern by looking at the character code: allocate a + 256-entry transition table. */ + trtable = state->trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); + if (BE (trtable == NULL, 0)) + goto out_free; + + /* For all characters ch...: */ + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; + elem; + mask <<= 1, elem >>= 1, ++ch) + if (BE (elem & 1, 0)) + { + /* There must be exactly one destination which accepts + character ch. See group_nodes_into_DFAstates. */ + for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) + ; + + /* j-th destination accepts the word character ch. */ + if (dfa->word_char[i] & mask) + trtable[ch] = dest_states_word[j]; + else + trtable[ch] = dest_states[j]; + } + } + else + { + /* We care about whether the following character is a word + character, and we are in a multi-byte character set: discern + by looking at the character code: build two 256-entry + transition tables, one starting at trtable[0] and one + starting at trtable[SBC_MAX]. */ + trtable = state->word_trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX); + if (BE (trtable == NULL, 0)) + goto out_free; + + /* For all characters ch...: */ + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; + elem; + mask <<= 1, elem >>= 1, ++ch) + if (BE (elem & 1, 0)) + { + /* There must be exactly one destination which accepts + character ch. See group_nodes_into_DFAstates. */ + for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) + ; + + /* j-th destination accepts the word character ch. */ + trtable[ch] = dest_states[j]; + trtable[ch + SBC_MAX] = dest_states_word[j]; + } + } + + /* new line */ + if (bitset_contain (acceptable, NEWLINE_CHAR)) + { + /* The current state accepts newline character. */ + for (j = 0; j < ndests; ++j) + if (bitset_contain (dests_ch[j], NEWLINE_CHAR)) + { + /* k-th destination accepts newline character. */ + trtable[NEWLINE_CHAR] = dest_states_nl[j]; + if (need_word_trtable) + trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j]; + /* There must be only one destination which accepts + newline. See group_nodes_into_DFAstates. */ + break; + } + } + + if (dest_states_malloced) + free (dest_states); + + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + + if (dests_node_malloced) + free (dests_alloc); + + return 1; +} + +/* Group all nodes belonging to STATE into several destinations. + Then for all destinations, set the nodes belonging to the destination + to DESTS_NODE[i] and set the characters accepted by the destination + to DEST_CH[i]. This function return the number of destinations. */ + +static int +internal_function +group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, + re_node_set *dests_node, bitset_t *dests_ch) +{ + reg_errcode_t err; + int result; + int i, j, k; + int ndests; /* Number of the destinations from `state'. */ + bitset_t accepts; /* Characters a node can accept. */ + const re_node_set *cur_nodes = &state->nodes; + bitset_empty (accepts); + ndests = 0; + + /* For all the nodes belonging to `state', */ + for (i = 0; i < cur_nodes->nelem; ++i) + { + re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; + re_token_type_t type = node->type; + unsigned int constraint = node->constraint; + + /* Enumerate all single byte character this node can accept. */ + if (type == CHARACTER) + bitset_set (accepts, node->opr.c); + else if (type == SIMPLE_BRACKET) + { + bitset_merge (accepts, node->opr.sbcset); + } + else if (type == OP_PERIOD) + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + bitset_merge (accepts, dfa->sb_char); + else +#endif + bitset_set_all (accepts); + if (!(dfa->syntax & RE_DOT_NEWLINE)) + bitset_clear (accepts, '\n'); + if (dfa->syntax & RE_DOT_NOT_NULL) + bitset_clear (accepts, '\0'); + } +#ifdef RE_ENABLE_I18N + else if (type == OP_UTF8_PERIOD) + { + memset (accepts, '\xff', sizeof (bitset_t) / 2); + if (!(dfa->syntax & RE_DOT_NEWLINE)) + bitset_clear (accepts, '\n'); + if (dfa->syntax & RE_DOT_NOT_NULL) + bitset_clear (accepts, '\0'); + } +#endif + else + continue; + + /* Check the `accepts' and sift the characters which are not + match it the context. */ + if (constraint) + { + if (constraint & NEXT_NEWLINE_CONSTRAINT) + { + bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); + bitset_empty (accepts); + if (accepts_newline) + bitset_set (accepts, NEWLINE_CHAR); + else + continue; + } + if (constraint & NEXT_ENDBUF_CONSTRAINT) + { + bitset_empty (accepts); + continue; + } + + if (constraint & NEXT_WORD_CONSTRAINT) + { + bitset_word_t any_set = 0; + if (type == CHARACTER && !node->word_char) + { + bitset_empty (accepts); + continue; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); + else +#endif + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= dfa->word_char[j]); + if (!any_set) + continue; + } + if (constraint & NEXT_NOTWORD_CONSTRAINT) + { + bitset_word_t any_set = 0; + if (type == CHARACTER && node->word_char) + { + bitset_empty (accepts); + continue; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); + else +#endif + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= ~dfa->word_char[j]); + if (!any_set) + continue; + } + } + + /* Then divide `accepts' into DFA states, or create a new + state. Above, we make sure that accepts is not empty. */ + for (j = 0; j < ndests; ++j) + { + bitset_t intersec; /* Intersection sets, see below. */ + bitset_t remains; + /* Flags, see below. */ + bitset_word_t has_intersec, not_subset, not_consumed; + + /* Optimization, skip if this state doesn't accept the character. */ + if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) + continue; + + /* Enumerate the intersection set of this state and `accepts'. */ + has_intersec = 0; + for (k = 0; k < BITSET_WORDS; ++k) + has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; + /* And skip if the intersection set is empty. */ + if (!has_intersec) + continue; + + /* Then check if this state is a subset of `accepts'. */ + not_subset = not_consumed = 0; + for (k = 0; k < BITSET_WORDS; ++k) + { + not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; + not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; + } + + /* If this state isn't a subset of `accepts', create a + new group state, which has the `remains'. */ + if (not_subset) + { + bitset_copy (dests_ch[ndests], remains); + bitset_copy (dests_ch[j], intersec); + err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); + if (BE (err != REG_NOERROR, 0)) + goto error_return; + ++ndests; + } + + /* Put the position in the current group. */ + result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); + if (BE (result < 0, 0)) + goto error_return; + + /* If all characters are consumed, go to next node. */ + if (!not_consumed) + break; + } + /* Some characters remain, create a new group. */ + if (j == ndests) + { + bitset_copy (dests_ch[ndests], accepts); + err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); + if (BE (err != REG_NOERROR, 0)) + goto error_return; + ++ndests; + bitset_empty (accepts); + } + } + return ndests; + error_return: + for (j = 0; j < ndests; ++j) + re_node_set_free (dests_node + j); + return -1; +} + +#ifdef RE_ENABLE_I18N +/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. + Return the number of the bytes the node accepts. + STR_IDX is the current index of the input string. + + This function handles the nodes which can accept one character, or + one collating element like '.', '[a-z]', opposite to the other nodes + can only accept one byte. */ + +static int +internal_function +check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int str_idx) +{ + const re_token_t *node = dfa->nodes + node_idx; + int char_len, elem_len; + int i; + + if (BE (node->type == OP_UTF8_PERIOD, 0)) + { + unsigned char c = re_string_byte_at (input, str_idx), d; + if (BE (c < 0xc2, 1)) + return 0; + + if (str_idx + 2 > input->len) + return 0; + + d = re_string_byte_at (input, str_idx + 1); + if (c < 0xe0) + return (d < 0x80 || d > 0xbf) ? 0 : 2; + else if (c < 0xf0) + { + char_len = 3; + if (c == 0xe0 && d < 0xa0) + return 0; + } + else if (c < 0xf8) + { + char_len = 4; + if (c == 0xf0 && d < 0x90) + return 0; + } + else if (c < 0xfc) + { + char_len = 5; + if (c == 0xf8 && d < 0x88) + return 0; + } + else if (c < 0xfe) + { + char_len = 6; + if (c == 0xfc && d < 0x84) + return 0; + } + else + return 0; + + if (str_idx + char_len > input->len) + return 0; + + for (i = 1; i < char_len; ++i) + { + d = re_string_byte_at (input, str_idx + i); + if (d < 0x80 || d > 0xbf) + return 0; + } + return char_len; + } + + char_len = re_string_char_size_at (input, str_idx); + if (node->type == OP_PERIOD) + { + if (char_len <= 1) + return 0; + /* FIXME: I don't think this if is needed, as both '\n' + and '\0' are char_len == 1. */ + /* '.' accepts any one character except the following two cases. */ + if ((!(dfa->syntax & RE_DOT_NEWLINE) && + re_string_byte_at (input, str_idx) == '\n') || + ((dfa->syntax & RE_DOT_NOT_NULL) && + re_string_byte_at (input, str_idx) == '\0')) + return 0; + return char_len; + } + + elem_len = re_string_elem_size_at (input, str_idx); + if ((elem_len <= 1 && char_len <= 1) || char_len == 0) + return 0; + + if (node->type == COMPLEX_BRACKET) + { + const re_charset_t *cset = node->opr.mbcset; +# ifdef _LIBC + const unsigned char *pin + = ((const unsigned char *) re_string_get_buffer (input) + str_idx); + int j; + uint32_t nrules; +# endif /* _LIBC */ + int match_len = 0; + wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) + ? re_string_wchar_at (input, str_idx) : 0); + + /* match with multibyte character? */ + for (i = 0; i < cset->nmbchars; ++i) + if (wc == cset->mbchars[i]) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + /* match with character_class? */ + for (i = 0; i < cset->nchar_classes; ++i) + { + wctype_t wt = cset->char_classes[i]; + if (__iswctype (wc, wt)) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + +# ifdef _LIBC + nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules != 0) + { + unsigned int in_collseq = 0; + const int32_t *table, *indirect; + const unsigned char *weights, *extra; + const char *collseqwc; + /* This #include defines a local function! */ +# include + + /* match with collating_symbol? */ + if (cset->ncoll_syms) + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + for (i = 0; i < cset->ncoll_syms; ++i) + { + const unsigned char *coll_sym = extra + cset->coll_syms[i]; + /* Compare the length of input collating element and + the length of current collating element. */ + if (*coll_sym != elem_len) + continue; + /* Compare each bytes. */ + for (j = 0; j < *coll_sym; j++) + if (pin[j] != coll_sym[1 + j]) + break; + if (j == *coll_sym) + { + /* Match if every bytes is equal. */ + match_len = j; + goto check_node_accept_bytes_match; + } + } + + if (cset->nranges) + { + if (elem_len <= char_len) + { + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + in_collseq = __collseq_table_lookup (collseqwc, wc); + } + else + in_collseq = find_collation_sequence_value (pin, elem_len); + } + /* match with range expression? */ + for (i = 0; i < cset->nranges; ++i) + if (cset->range_starts[i] <= in_collseq + && in_collseq <= cset->range_ends[i]) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + + /* match with equivalence_class? */ + if (cset->nequiv_classes) + { + const unsigned char *cp = pin; + table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + weights = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); + int32_t idx = findidx (&cp); + if (idx > 0) + for (i = 0; i < cset->nequiv_classes; ++i) + { + int32_t equiv_class_idx = cset->equiv_classes[i]; + size_t weight_len = weights[idx & 0xffffff]; + if (weight_len == weights[equiv_class_idx & 0xffffff] + && (idx >> 24) == (equiv_class_idx >> 24)) + { + int cnt = 0; + + idx &= 0xffffff; + equiv_class_idx &= 0xffffff; + + while (cnt <= weight_len + && (weights[equiv_class_idx + 1 + cnt] + == weights[idx + 1 + cnt])) + ++cnt; + if (cnt > weight_len) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + } + } + } + } + else +# endif /* _LIBC */ + { + /* match with range expression? */ +#if __GNUC__ >= 2 + wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; +#else + wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + cmp_buf[2] = wc; +#endif + for (i = 0; i < cset->nranges; ++i) + { + cmp_buf[0] = cset->range_starts[i]; + cmp_buf[4] = cset->range_ends[i]; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + } + check_node_accept_bytes_match: + if (!cset->non_match) + return match_len; + else + { + if (match_len > 0) + return 0; + else + return (elem_len > char_len) ? elem_len : char_len; + } + } + return 0; +} + +# ifdef _LIBC +static unsigned int +internal_function +find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) +{ + uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules == 0) + { + if (mbs_len == 1) + { + /* No valid character. Match it as a single byte character. */ + const unsigned char *collseq = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); + return collseq[mbs[0]]; + } + return UINT_MAX; + } + else + { + int32_t idx; + const unsigned char *extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + int32_t extrasize = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra; + + for (idx = 0; idx < extrasize;) + { + int mbs_cnt, found = 0; + int32_t elem_mbs_len; + /* Skip the name of collating element name. */ + idx = idx + extra[idx] + 1; + elem_mbs_len = extra[idx++]; + if (mbs_len == elem_mbs_len) + { + for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt) + if (extra[idx + mbs_cnt] != mbs[mbs_cnt]) + break; + if (mbs_cnt == elem_mbs_len) + /* Found the entry. */ + found = 1; + } + /* Skip the byte sequence of the collating element. */ + idx += elem_mbs_len; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + /* Skip the collation sequence value. */ + idx += sizeof (uint32_t); + /* Skip the wide char sequence of the collating element. */ + idx = idx + sizeof (uint32_t) * (extra[idx] + 1); + /* If we found the entry, return the sequence value. */ + if (found) + return *(uint32_t *) (extra + idx); + /* Skip the collation sequence value. */ + idx += sizeof (uint32_t); + } + return UINT_MAX; + } +} +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ + +/* Check whether the node accepts the byte which is IDX-th + byte of the INPUT. */ + +static int +internal_function +check_node_accept (const re_match_context_t *mctx, const re_token_t *node, + int idx) +{ + unsigned char ch; + ch = re_string_byte_at (&mctx->input, idx); + switch (node->type) + { + case CHARACTER: + if (node->opr.c != ch) + return 0; + break; + + case SIMPLE_BRACKET: + if (!bitset_contain (node->opr.sbcset, ch)) + return 0; + break; + +#ifdef RE_ENABLE_I18N + case OP_UTF8_PERIOD: + if (ch >= 0x80) + return 0; + /* FALLTHROUGH */ +#endif + case OP_PERIOD: + if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) + || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) + return 0; + break; + + default: + return 0; + } + + if (node->constraint) + { + /* The node has constraints. Check whether the current context + satisfies the constraints. */ + unsigned int context = re_string_context_at (&mctx->input, idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + return 0; + } + + return 1; +} + +/* Extend the buffers, if the buffers have run out. */ + +static reg_errcode_t +internal_function +extend_buffers (re_match_context_t *mctx) +{ + reg_errcode_t ret; + re_string_t *pstr = &mctx->input; + + /* Double the lengthes of the buffers. */ + ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + if (mctx->state_log != NULL) + { + /* And double the length of state_log. */ + /* XXX We have no indication of the size of this buffer. If this + allocation fail we have no indication that the state_log array + does not have the right size. */ + re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *, + pstr->bufs_len + 1); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + mctx->state_log = new_array; + } + + /* Then reconstruct the buffers. */ + if (pstr->icase) + { +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + else +#endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); + } + else + { +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + build_wcs_buffer (pstr); + else +#endif /* RE_ENABLE_I18N */ + { + if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + } + return REG_NOERROR; +} + + +/* Functions for matching context. */ + +/* Initialize MCTX. */ + +static reg_errcode_t +internal_function +match_ctx_init (re_match_context_t *mctx, int eflags, int n) +{ + mctx->eflags = eflags; + mctx->match_last = -1; + if (n > 0) + { + mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); + mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); + if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) + return REG_ESPACE; + } + /* Already zero-ed by the caller. + else + mctx->bkref_ents = NULL; + mctx->nbkref_ents = 0; + mctx->nsub_tops = 0; */ + mctx->abkref_ents = n; + mctx->max_mb_elem_len = 1; + mctx->asub_tops = n; + return REG_NOERROR; +} + +/* Clean the entries which depend on the current input in MCTX. + This function must be invoked when the matcher changes the start index + of the input, or changes the input string. */ + +static void +internal_function +match_ctx_clean (re_match_context_t *mctx) +{ + int st_idx; + for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) + { + int sl_idx; + re_sub_match_top_t *top = mctx->sub_tops[st_idx]; + for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) + { + re_sub_match_last_t *last = top->lasts[sl_idx]; + re_free (last->path.array); + re_free (last); + } + re_free (top->lasts); + if (top->path) + { + re_free (top->path->array); + re_free (top->path); + } + free (top); + } + + mctx->nsub_tops = 0; + mctx->nbkref_ents = 0; +} + +/* Free all the memory associated with MCTX. */ + +static void +internal_function +match_ctx_free (re_match_context_t *mctx) +{ + /* First, free all the memory associated with MCTX->SUB_TOPS. */ + match_ctx_clean (mctx); + re_free (mctx->sub_tops); + re_free (mctx->bkref_ents); +} + +/* Add a new backreference entry to MCTX. + Note that we assume that caller never call this function with duplicate + entry, and call with STR_IDX which isn't smaller than any existing entry. +*/ + +static reg_errcode_t +internal_function +match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from, + int to) +{ + if (mctx->nbkref_ents >= mctx->abkref_ents) + { + struct re_backref_cache_entry* new_entry; + new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, + mctx->abkref_ents * 2); + if (BE (new_entry == NULL, 0)) + { + re_free (mctx->bkref_ents); + return REG_ESPACE; + } + mctx->bkref_ents = new_entry; + memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', + sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); + mctx->abkref_ents *= 2; + } + if (mctx->nbkref_ents > 0 + && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx) + mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1; + + mctx->bkref_ents[mctx->nbkref_ents].node = node; + mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; + mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; + mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; + + /* This is a cache that saves negative results of check_dst_limits_calc_pos. + If bit N is clear, means that this entry won't epsilon-transition to + an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If + it is set, check_dst_limits_calc_pos_1 will recurse and try to find one + such node. + + A backreference does not epsilon-transition unless it is empty, so set + to all zeros if FROM != TO. */ + mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map + = (from == to ? ~0 : 0); + + mctx->bkref_ents[mctx->nbkref_ents++].more = 0; + if (mctx->max_mb_elem_len < to - from) + mctx->max_mb_elem_len = to - from; + return REG_NOERROR; +} + +/* Search for the first entry which has the same str_idx, or -1 if none is + found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ + +static int +internal_function +search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) +{ + int left, right, mid, last; + last = right = mctx->nbkref_ents; + for (left = 0; left < right;) + { + mid = (left + right) / 2; + if (mctx->bkref_ents[mid].str_idx < str_idx) + left = mid + 1; + else + right = mid; + } + if (left < last && mctx->bkref_ents[left].str_idx == str_idx) + return left; + else + return -1; +} + +/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches + at STR_IDX. */ + +static reg_errcode_t +internal_function +match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx) +{ +#ifdef DEBUG + assert (mctx->sub_tops != NULL); + assert (mctx->asub_tops > 0); +#endif + if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) + { + int new_asub_tops = mctx->asub_tops * 2; + re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, + re_sub_match_top_t *, + new_asub_tops); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + mctx->sub_tops = new_array; + mctx->asub_tops = new_asub_tops; + } + mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); + if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0)) + return REG_ESPACE; + mctx->sub_tops[mctx->nsub_tops]->node = node; + mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx; + return REG_NOERROR; +} + +/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches + at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ + +static re_sub_match_last_t * +internal_function +match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx) +{ + re_sub_match_last_t *new_entry; + if (BE (subtop->nlasts == subtop->alasts, 0)) + { + int new_alasts = 2 * subtop->alasts + 1; + re_sub_match_last_t **new_array = re_realloc (subtop->lasts, + re_sub_match_last_t *, + new_alasts); + if (BE (new_array == NULL, 0)) + return NULL; + subtop->lasts = new_array; + subtop->alasts = new_alasts; + } + new_entry = calloc (1, sizeof (re_sub_match_last_t)); + if (BE (new_entry != NULL, 1)) + { + subtop->lasts[subtop->nlasts] = new_entry; + new_entry->node = node; + new_entry->str_idx = str_idx; + ++subtop->nlasts; + } + return new_entry; +} + +static void +internal_function +sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, int last_str_idx) +{ + sctx->sifted_states = sifted_sts; + sctx->limited_states = limited_sts; + sctx->last_node = last_node; + sctx->last_str_idx = last_str_idx; + re_node_set_init_empty (&sctx->limits); +} diff --git a/html.c b/html.c new file mode 100644 index 0000000..6801d37 --- /dev/null +++ b/html.c @@ -0,0 +1,49 @@ +/* +* $Id: html.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for HTML language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ +#include "parse.h" + +/* +* FUNCTION DEFINITIONS +*/ + +static void installHtmlRegex (const langType language) +{ +#define POSSIBLE_ATTRIBUTES "([ \t]+[a-z]+=\"?[^>\"]*\"?)*" + addTagRegex (language, + "\"]+)\"?" + POSSIBLE_ATTRIBUTES + "[ \t]*>", + "\\2", "a,anchor,named anchors", "i"); + + addTagRegex (language, "^[ \t]*function[ \t]*([A-Za-z0-9_]+)[ \t]*\\(", + "\\1", "f,function,JavaScript functions", NULL); +} + +/* Create parser definition stucture */ +extern parserDefinition* HtmlParser (void) +{ + static const char *const extensions [] = { "htm", "html", NULL }; + parserDefinition *const def = parserNew ("HTML"); + def->extensions = extensions; + def->initialize = installHtmlRegex; + def->regex = TRUE; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/jscript.c b/jscript.c new file mode 100644 index 0000000..c4e5b1a --- /dev/null +++ b/jscript.c @@ -0,0 +1,1572 @@ +/* + * $Id: jscript.c 666 2008-05-15 17:47:31Z dfishburn $ + * + * Copyright (c) 2003, Darren Hiebert + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. + * + * This module contains functions for generating tags for JavaScript language + * files. + * + * This is a good reference for different forms of the function statement: + * http://www.permadi.com/tutorial/jsFunc/ + * Another good reference: + * http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide + */ + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ +#include /* to define isalpha () */ +#include +#ifdef DEBUG +#include +#endif + +#include "debug.h" +#include "entry.h" +#include "keyword.h" +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* + * MACROS + */ +#define isType(token,t) (boolean) ((token)->type == (t)) +#define isKeyword(token,k) (boolean) ((token)->keyword == (k)) + +/* + * DATA DECLARATIONS + */ + +typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; + +/* + * Tracks class and function names already created + */ +static stringList *ClassNames; +static stringList *FunctionNames; + +/* Used to specify type of keyword. +*/ +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_function, + KEYWORD_capital_function, + KEYWORD_object, + KEYWORD_capital_object, + KEYWORD_prototype, + KEYWORD_var, + KEYWORD_new, + KEYWORD_this, + KEYWORD_for, + KEYWORD_while, + KEYWORD_do, + KEYWORD_if, + KEYWORD_else, + KEYWORD_switch, + KEYWORD_try, + KEYWORD_catch, + KEYWORD_finally +} keywordId; + +/* Used to determine whether keyword is valid for the token language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; +} keywordDesc; + +typedef enum eTokenType { + TOKEN_UNDEFINED, + TOKEN_CHARACTER, + TOKEN_CLOSE_PAREN, + TOKEN_SEMICOLON, + TOKEN_COLON, + TOKEN_COMMA, + TOKEN_KEYWORD, + TOKEN_OPEN_PAREN, + TOKEN_OPERATOR, + TOKEN_IDENTIFIER, + TOKEN_STRING, + TOKEN_PERIOD, + TOKEN_OPEN_CURLY, + TOKEN_CLOSE_CURLY, + TOKEN_EQUAL_SIGN, + TOKEN_FORWARD_SLASH, + TOKEN_OPEN_SQUARE, + TOKEN_CLOSE_SQUARE +} tokenType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + vString * string; + vString * scope; + unsigned long lineNumber; + fpos_t filePosition; + int nestLevel; + boolean ignoreTag; +} tokenInfo; + +/* + * DATA DEFINITIONS + */ + +static langType Lang_js; + +static jmp_buf Exception; + +typedef enum { + JSTAG_FUNCTION, + JSTAG_CLASS, + JSTAG_METHOD, + JSTAG_PROPERTY, + JSTAG_VARIABLE, + JSTAG_COUNT +} jsKind; + +static kindOption JsKinds [] = { + { TRUE, 'f', "function", "functions" }, + { TRUE, 'c', "class", "classes" }, + { TRUE, 'm', "method", "methods" }, + { TRUE, 'p', "property", "properties" }, + { TRUE, 'v', "variable", "global variables" } +}; + +static const keywordDesc JsKeywordTable [] = { + /* keyword keyword ID */ + { "function", KEYWORD_function }, + { "Function", KEYWORD_capital_function }, + { "object", KEYWORD_object }, + { "Object", KEYWORD_capital_object }, + { "prototype", KEYWORD_prototype }, + { "var", KEYWORD_var }, + { "new", KEYWORD_new }, + { "this", KEYWORD_this }, + { "for", KEYWORD_for }, + { "while", KEYWORD_while }, + { "do", KEYWORD_do }, + { "if", KEYWORD_if }, + { "else", KEYWORD_else }, + { "switch", KEYWORD_switch }, + { "try", KEYWORD_try }, + { "catch", KEYWORD_catch }, + { "finally", KEYWORD_finally } +}; + +/* + * FUNCTION DEFINITIONS + */ + +/* Recursive functions */ +static void parseFunction (tokenInfo *const token); +static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent); +static boolean parseLine (tokenInfo *const token, boolean is_inside_class); + +static boolean isIdentChar (const int c) +{ + return (boolean) + (isalpha (c) || isdigit (c) || c == '$' || + c == '@' || c == '_' || c == '#'); +} + +static void buildJsKeywordHash (void) +{ + const size_t count = sizeof (JsKeywordTable) / + sizeof (JsKeywordTable [0]); + size_t i; + for (i = 0 ; i < count ; ++i) + { + const keywordDesc* const p = &JsKeywordTable [i]; + addKeyword (p->name, Lang_js, (int) p->id); + } +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->string = vStringNew (); + token->scope = vStringNew (); + token->nestLevel = 0; + token->ignoreTag = FALSE; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + + return token; +} + +static void deleteToken (tokenInfo *const token) +{ + vStringDelete (token->string); + vStringDelete (token->scope); + eFree (token); +} + +/* + * Tag generation functions + */ + +static void makeConstTag (tokenInfo *const token, const jsKind kind) +{ + if (JsKinds [kind].enabled && ! token->ignoreTag ) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + initTagEntry (&e, name); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.kindName = JsKinds [kind].name; + e.kind = JsKinds [kind].letter; + + makeTagEntry (&e); + } +} + +static void makeJsTag (tokenInfo *const token, const jsKind kind) +{ + vString * fulltag; + + if (JsKinds [kind].enabled && ! token->ignoreTag ) + { + /* + * If a scope has been added to the token, change the token + * string to include the scope when making the tag. + */ + if ( vStringLength(token->scope) > 0 ) + { + fulltag = vStringNew (); + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + vStringTerminate(fulltag); + vStringCopy(token->string, fulltag); + vStringDelete (fulltag); + } + makeConstTag (token, kind); + } +} + +static void makeClassTag (tokenInfo *const token) +{ + vString * fulltag; + + if ( ! token->ignoreTag ) + { + fulltag = vStringNew (); + if (vStringLength (token->scope) > 0) + { + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + } + else + { + vStringCopy(fulltag, token->string); + } + vStringTerminate(fulltag); + if ( ! stringListHas(ClassNames, vStringValue (fulltag)) ) + { + stringListAdd (ClassNames, vStringNewCopy (fulltag)); + makeJsTag (token, JSTAG_CLASS); + } + vStringDelete (fulltag); + } +} + +static void makeFunctionTag (tokenInfo *const token) +{ + vString * fulltag; + + if ( ! token->ignoreTag ) + { + fulltag = vStringNew (); + if (vStringLength (token->scope) > 0) + { + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + } + else + { + vStringCopy(fulltag, token->string); + } + vStringTerminate(fulltag); + if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) ) + { + stringListAdd (FunctionNames, vStringNewCopy (fulltag)); + makeJsTag (token, JSTAG_FUNCTION); + } + vStringDelete (fulltag); + } +} + +/* + * Parsing functions + */ + +static void parseString (vString *const string, const int delimiter) +{ + boolean end = FALSE; + while (! end) + { + int c = fileGetc (); + if (c == EOF) + end = TRUE; + else if (c == '\\') + { + c = fileGetc(); /* This maybe a ' or ". */ + vStringPut(string, c); + } + else if (c == delimiter) + end = TRUE; + else + vStringPut (string, c); + } + vStringTerminate (string); +} + +/* Read a C identifier beginning with "firstChar" and places it into + * "name". + */ +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + Assert (isIdentChar (c)); + do + { + vStringPut (string, c); + c = fileGetc (); + } while (isIdentChar (c)); + vStringTerminate (string); + if (!isspace (c)) + fileUngetc (c); /* unget non-identifier character */ +} + +static void readToken (tokenInfo *const token) +{ + int c; + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar: + do + { + c = fileGetc (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + while (c == '\t' || c == ' ' || c == '\n'); + + switch (c) + { + case EOF: longjmp (Exception, (int)ExceptionEOF); break; + case '(': token->type = TOKEN_OPEN_PAREN; break; + case ')': token->type = TOKEN_CLOSE_PAREN; break; + case ';': token->type = TOKEN_SEMICOLON; break; + case ',': token->type = TOKEN_COMMA; break; + case '.': token->type = TOKEN_PERIOD; break; + case ':': token->type = TOKEN_COLON; break; + case '{': token->type = TOKEN_OPEN_CURLY; break; + case '}': token->type = TOKEN_CLOSE_CURLY; break; + case '=': token->type = TOKEN_EQUAL_SIGN; break; + case '[': token->type = TOKEN_OPEN_SQUARE; break; + case ']': token->type = TOKEN_CLOSE_SQUARE; break; + + case '\'': + case '"': + token->type = TOKEN_STRING; + parseString (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '\\': + c = fileGetc (); + if (c != '\\' && c != '"' && !isspace (c)) + fileUngetc (c); + token->type = TOKEN_CHARACTER; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '/': + { + int d = fileGetc (); + if ( (d != '*') && /* is this the start of a comment? */ + (d != '/') ) /* is a one line comment? */ + { + token->type = TOKEN_FORWARD_SLASH; + fileUngetc (d); + } + else + { + if (d == '*') + { + do + { + fileSkipToCharacter ('*'); + c = fileGetc (); + if (c == '/') + break; + else + fileUngetc (c); + } while (c != EOF && c != '\0'); + goto getNextChar; + } + else if (d == '/') /* is this the start of a comment? */ + { + fileSkipToCharacter ('\n'); + goto getNextChar; + } + } + break; + } + + default: + if (! isIdentChar (c)) + token->type = TOKEN_UNDEFINED; + else + { + parseIdentifier (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + token->keyword = analyzeToken (token->string, Lang_js); + if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + break; + } +} + +static void copyToken (tokenInfo *const dest, tokenInfo *const src) +{ + dest->nestLevel = src->nestLevel; + dest->lineNumber = src->lineNumber; + dest->filePosition = src->filePosition; + dest->type = src->type; + dest->keyword = src->keyword; + vStringCopy(dest->string, src->string); + vStringCopy(dest->scope, src->scope); +} + +/* + * Token parsing functions + */ + +static void skipArgumentList (tokenInfo *const token) +{ + int nest_level = 0; + + /* + * Other databases can have arguments with fully declared + * datatypes: + * ( name varchar(30), text binary(10) ) + * So we must check for nested open and closing parantheses + */ + + if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */ + { + nest_level++; + while (! (isType (token, TOKEN_CLOSE_PAREN) && (nest_level == 0))) + { + readToken (token); + if (isType (token, TOKEN_OPEN_PAREN)) + { + nest_level++; + } + if (isType (token, TOKEN_CLOSE_PAREN)) + { + if (nest_level > 0) + { + nest_level--; + } + } + } + readToken (token); + } +} + +static void skipArrayList (tokenInfo *const token) +{ + int nest_level = 0; + + /* + * Handle square brackets + * var name[1] + * So we must check for nested open and closing square brackets + */ + + if (isType (token, TOKEN_OPEN_SQUARE)) /* arguments? */ + { + nest_level++; + while (! (isType (token, TOKEN_CLOSE_SQUARE) && (nest_level == 0))) + { + readToken (token); + if (isType (token, TOKEN_OPEN_SQUARE)) + { + nest_level++; + } + if (isType (token, TOKEN_CLOSE_SQUARE)) + { + if (nest_level > 0) + { + nest_level--; + } + } + } + readToken (token); + } +} + +static void addContext (tokenInfo* const parent, const tokenInfo* const child) +{ + if (vStringLength (parent->string) > 0) + { + vStringCatS (parent->string, "."); + } + vStringCatS (parent->string, vStringValue(child->string)); + vStringTerminate(parent->string); +} + +static void addToScope (tokenInfo* const token, vString* const extra) +{ + if (vStringLength (token->scope) > 0) + { + vStringCatS (token->scope, "."); + } + vStringCatS (token->scope, vStringValue(extra)); + vStringTerminate(token->scope); +} + +/* + * Scanning functions + */ + +static void findCmdTerm (tokenInfo *const token) +{ + /* + * Read until we find either a semicolon or closing brace. + * Any nested braces will be handled within. + */ + while (! ( isType (token, TOKEN_SEMICOLON) || + isType (token, TOKEN_CLOSE_CURLY) ) ) + { + /* Handle nested blocks */ + if ( isType (token, TOKEN_OPEN_CURLY)) + { + parseBlock (token, token); + } + else if ( isType (token, TOKEN_OPEN_PAREN) ) + { + skipArgumentList(token); + } + else + { + readToken (token); + } + } +} + +static void parseSwitch (tokenInfo *const token) +{ + /* + * switch (expression){ + * case value1: + * statement; + * break; + * case value2: + * statement; + * break; + * default : statement; + * } + */ + + readToken (token); + + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions, these will only + * be considered methods. + */ + skipArgumentList(token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + parseBlock (token, token); + } + +} + +static void parseLoop (tokenInfo *const token) +{ + /* + * Handles these statements + * for (x=0; x<3; x++) + * document.write("This text is repeated three times
"); + * + * for (x=0; x<3; x++) + * { + * document.write("This text is repeated three times
"); + * } + * + * while (number<5){ + * document.write(number+"
"); + * number++; + * } + * + * do{ + * document.write(number+"
"); + * number++; + * } + * while (number<5); + */ + + if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while)) + { + readToken(token); + + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions, these will only + * be considered methods. + */ + skipArgumentList(token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + parseBlock (token, token); + } + else + { + parseLine(token, FALSE); + } + } + else if (isKeyword (token, KEYWORD_do)) + { + readToken(token); + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + parseBlock (token, token); + } + else + { + parseLine(token, FALSE); + } + + readToken(token); + + if (isKeyword (token, KEYWORD_while)) + { + readToken(token); + + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions, these will only + * be considered methods. + */ + skipArgumentList(token); + } + } + } +} + +static boolean parseIf (tokenInfo *const token) +{ + boolean read_next_token = TRUE; + /* + * If statements have two forms + * if ( ... ) + * one line; + * + * if ( ... ) + * statement; + * else + * statement + * + * if ( ... ) { + * multiple; + * statements; + * } + * + * + * if ( ... ) { + * return elem + * } + * + * This example if correctly written, but the + * else contains only 1 statement without a terminator + * since the function finishes with the closing brace. + * + * function a(flag){ + * if(flag) + * test(1); + * else + * test(2) + * } + * + * TODO: Deal with statements that can optional end + * without a semi-colon. Currently this messes up + * the parsing of blocks. + * Need to somehow detect this has happened, and either + * backup a token, or skip reading the next token if + * that is possible from all code locations. + * + */ + + readToken (token); + + if (isKeyword (token, KEYWORD_if)) + { + /* + * Check for an "else if" and consume the "if" + */ + readToken (token); + } + + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions, these will only + * be considered methods. + */ + skipArgumentList(token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + parseBlock (token, token); + } + else + { + findCmdTerm (token); + + /* + * The IF could be followed by an ELSE statement. + * This too could have two formats, a curly braced + * multiline section, or another single line. + */ + + if (isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * This statement did not have a line terminator. + */ + read_next_token = FALSE; + } + else + { + readToken (token); + + if (isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * This statement did not have a line terminator. + */ + read_next_token = FALSE; + } + else + { + if (isKeyword (token, KEYWORD_else)) + read_next_token = parseIf (token); + } + } + } + return read_next_token; +} + +static void parseFunction (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + boolean is_class = FALSE; + + /* + * This deals with these formats + * function validFunctionTwo(a,b) {} + */ + + readToken (name); + /* Add scope in case this is an INNER function */ + addToScope(name, token->scope); + + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + do + { + readToken (token); + if ( isKeyword(token, KEYWORD_NONE) ) + { + addContext (name, token); + readToken (token); + } + } while (isType (token, TOKEN_PERIOD)); + } + + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if ( isType (token, TOKEN_OPEN_CURLY) ) + { + is_class = parseBlock (token, name); + if ( is_class ) + makeClassTag (name); + else + makeFunctionTag (name); + } + + findCmdTerm (token); + + deleteToken (name); +} + +static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent) +{ + boolean is_class = FALSE; + boolean read_next_token = TRUE; + vString * saveScope = vStringNew (); + + token->nestLevel++; + /* + * Make this routine a bit more forgiving. + * If called on an open_curly advance it + */ + if ( isType (token, TOKEN_OPEN_CURLY) && + isKeyword(token, KEYWORD_NONE) ) + readToken(token); + + if (! isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * Read until we find the closing brace, + * any nested braces will be handled within + */ + do + { + read_next_token = TRUE; + if (isKeyword (token, KEYWORD_this)) + { + /* + * Means we are inside a class and have found + * a class, not a function + */ + is_class = TRUE; + vStringCopy(saveScope, token->scope); + addToScope (token, parent->string); + + /* + * Ignore the remainder of the line + * findCmdTerm(token); + */ + parseLine (token, is_class); + + vStringCopy(token->scope, saveScope); + } + else if (isKeyword (token, KEYWORD_var)) + { + /* + * Potentially we have found an inner function. + * Set something to indicate the scope + */ + vStringCopy(saveScope, token->scope); + addToScope (token, parent->string); + parseLine (token, is_class); + vStringCopy(token->scope, saveScope); + } + else if (isKeyword (token, KEYWORD_function)) + { + vStringCopy(saveScope, token->scope); + addToScope (token, parent->string); + parseFunction (token); + vStringCopy(token->scope, saveScope); + } + else if (isType (token, TOKEN_OPEN_CURLY)) + { + /* Handle nested blocks */ + parseBlock (token, parent); + } + else + { + /* + * It is possible for a line to have no terminator + * if the following line is a closing brace. + * parseLine will detect this case and indicate + * whether we should read an additional token. + */ + read_next_token = parseLine (token, is_class); + } + + /* + * Always read a new token unless we find a statement without + * a ending terminator + */ + if( read_next_token ) + readToken(token); + + /* + * If we find a statement without a terminator consider the + * block finished, otherwise the stack will be off by one. + */ + } while (! isType (token, TOKEN_CLOSE_CURLY) && read_next_token ); + } + + vStringDelete(saveScope); + token->nestLevel--; + + return is_class; +} + +static void parseMethods (tokenInfo *const token, tokenInfo *const class) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * validProperty : 2, + * validMethod : function(a,b) {} + * 'validMethod2' : function(a,b) {} + * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false} + */ + + do + { + readToken (token); + if (isType (token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE)) + { + copyToken(name, token); + + readToken (token); + if ( isType (token, TOKEN_COLON) ) + { + readToken (token); + if ( isKeyword (token, KEYWORD_function) ) + { + readToken (token); + if ( isType (token, TOKEN_OPEN_PAREN) ) + { + skipArgumentList(token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + addToScope (name, class->string); + makeJsTag (name, JSTAG_METHOD); + parseBlock (token, name); + + /* + * Read to the closing curly, check next + * token, if a comma, we must loop again + */ + readToken (token); + } + } + else + { + addToScope (name, class->string); + makeJsTag (name, JSTAG_PROPERTY); + + /* + * Read the next token, if a comma + * we must loop again + */ + readToken (token); + } + } + } + } while ( isType(token, TOKEN_COMMA) ); + + findCmdTerm (token); + + deleteToken (name); +} + +static boolean parseStatement (tokenInfo *const token, boolean is_inside_class) +{ + tokenInfo *const name = newToken (); + tokenInfo *const secondary_name = newToken (); + vString * saveScope = vStringNew (); + boolean is_class = FALSE; + boolean is_terminated = TRUE; + boolean is_global = FALSE; + boolean is_prototype = FALSE; + vString * fulltag; + + vStringClear(saveScope); + /* + * Functions can be named or unnamed. + * This deals with these formats: + * Function + * validFunctionOne = function(a,b) {} + * testlib.validFunctionFive = function(a,b) {} + * var innerThree = function(a,b) {} + * var innerFour = (a,b) {} + * var D2 = secondary_fcn_name(a,b) {} + * var D3 = new Function("a", "b", "return a+b;"); + * Class + * testlib.extras.ValidClassOne = function(a,b) { + * this.a = a; + * } + * Class Methods + * testlib.extras.ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + * ValidClassTwo = function () + * { + * this.validMethodThree = function() {} + * // unnamed method + * this.validMethodFour = () {} + * } + * Database.prototype.validMethodThree = Database_getTodaysDate; + */ + + if ( is_inside_class ) + is_class = TRUE; + /* + * var can preceed an inner function + */ + if ( isKeyword(token, KEYWORD_var) ) + { + /* + * Only create variables for global scope + */ + if ( token->nestLevel == 0 ) + { + is_global = TRUE; + } + readToken(token); + } + + if ( isKeyword(token, KEYWORD_this) ) + { + readToken(token); + if (isType (token, TOKEN_PERIOD)) + { + readToken(token); + } + } + + copyToken(name, token); + + while (! isType (token, TOKEN_CLOSE_CURLY) && + ! isType (token, TOKEN_SEMICOLON) && + ! isType (token, TOKEN_EQUAL_SIGN) ) + { + /* Potentially the name of the function */ + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + /* + * Cannot be a global variable is it has dot references in the name + */ + is_global = FALSE; + do + { + readToken (token); + if ( isKeyword(token, KEYWORD_NONE) ) + { + if ( is_class ) + { + vStringCopy(saveScope, token->scope); + addToScope(token, name->string); + } + else + addContext (name, token); + } + else if ( isKeyword(token, KEYWORD_prototype) ) + { + /* + * When we reach the "prototype" tag, we infer: + * "BindAgent" is a class + * "build" is a method + * + * function BindAgent( repeatableIdName, newParentIdName ) { + * } + * + * CASE 1 + * Specified function name: "build" + * BindAgent.prototype.build = function( mode ) { + * ignore everything within this function + * } + * + * CASE 2 + * Prototype listing + * ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + * + */ + makeClassTag (name); + is_class = TRUE; + is_prototype = TRUE; + + /* + * There should a ".function_name" next. + */ + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + /* + * Handle CASE 1 + */ + readToken (token); + if ( isKeyword(token, KEYWORD_NONE) ) + { + vStringCopy(saveScope, token->scope); + addToScope(token, name->string); + + makeJsTag (token, JSTAG_METHOD); + /* + * We can read until the end of the block / statement. + * We need to correctly parse any nested blocks, but + * we do NOT want to create any tags based on what is + * within the blocks. + */ + token->ignoreTag = TRUE; + /* + * Find to the end of the statement + */ + findCmdTerm (token); + token->ignoreTag = FALSE; + is_terminated = TRUE; + goto cleanUp; + } + } + else if (isType (token, TOKEN_EQUAL_SIGN)) + { + readToken (token); + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * Handle CASE 2 + * + * Creates tags for each of these class methods + * ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + */ + parseMethods(token, name); + /* + * Find to the end of the statement + */ + findCmdTerm (token); + token->ignoreTag = FALSE; + is_terminated = TRUE; + goto cleanUp; + } + } + } + readToken (token); + } while (isType (token, TOKEN_PERIOD)); + } + + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if ( isType (token, TOKEN_OPEN_SQUARE) ) + skipArrayList(token); + + /* + if ( isType (token, TOKEN_OPEN_CURLY) ) + { + is_class = parseBlock (token, name); + } + */ + } + + if ( isType (token, TOKEN_CLOSE_CURLY) ) + { + /* + * Reaching this section without having + * processed an open curly brace indicates + * the statement is most likely not terminated. + */ + is_terminated = FALSE; + goto cleanUp; + } + + if ( isType (token, TOKEN_SEMICOLON) ) + { + /* + * Only create variables for global scope + */ + if ( token->nestLevel == 0 && is_global ) + { + /* + * Handles this syntax: + * var g_var2; + */ + if (isType (token, TOKEN_SEMICOLON)) + makeJsTag (name, JSTAG_VARIABLE); + } + /* + * Statement has ended. + * This deals with calls to functions, like: + * alert(..); + */ + goto cleanUp; + } + + if ( isType (token, TOKEN_EQUAL_SIGN) ) + { + readToken (token); + + if ( isKeyword (token, KEYWORD_function) ) + { + readToken (token); + + if ( isKeyword (token, KEYWORD_NONE) && + ! isType (token, TOKEN_OPEN_PAREN) ) + { + /* + * Functions of this format: + * var D2A = function theAdd(a, b) + * { + * return a+b; + * } + * Are really two separate defined functions and + * can be referenced in two ways: + * alert( D2A(1,2) ); // produces 3 + * alert( theAdd(1,2) ); // also produces 3 + * So it must have two tags: + * D2A + * theAdd + * Save the reference to the name for later use, once + * we have established this is a valid function we will + * create the secondary reference to it. + */ + copyToken(secondary_name, token); + readToken (token); + } + + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * This will be either a function or a class. + * We can only determine this by checking the body + * of the function. If we find a "this." we know + * it is a class, otherwise it is a function. + */ + if ( is_inside_class ) + { + makeJsTag (name, JSTAG_METHOD); + if ( vStringLength(secondary_name->string) > 0 ) + makeFunctionTag (secondary_name); + parseBlock (token, name); + } + else + { + is_class = parseBlock (token, name); + if ( is_class ) + makeClassTag (name); + else + makeFunctionTag (name); + + if ( vStringLength(secondary_name->string) > 0 ) + makeFunctionTag (secondary_name); + + /* + * Find to the end of the statement + */ + goto cleanUp; + } + } + } + else if (isType (token, TOKEN_OPEN_PAREN)) + { + /* + * Handle nameless functions + * this.method_name = () {} + */ + skipArgumentList(token); + + if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * Nameless functions are only setup as methods. + */ + makeJsTag (name, JSTAG_METHOD); + parseBlock (token, name); + } + } + else if (isType (token, TOKEN_OPEN_CURLY)) + { + /* + * Creates tags for each of these class methods + * ValidClassOne.prototype = { + * 'validMethodOne' : function(a,b) {}, + * 'validMethodTwo' : function(a,b) {} + * } + */ + parseMethods(token, name); + if (isType (token, TOKEN_CLOSE_CURLY)) + { + /* + * Assume the closing parantheses terminates + * this statements. + */ + is_terminated = TRUE; + } + } + else if (isKeyword (token, KEYWORD_new)) + { + readToken (token); + if ( isKeyword (token, KEYWORD_function) || + isKeyword (token, KEYWORD_capital_function) || + isKeyword (token, KEYWORD_object) || + isKeyword (token, KEYWORD_capital_object) ) + { + if ( isKeyword (token, KEYWORD_object) || + isKeyword (token, KEYWORD_capital_object) ) + is_class = TRUE; + + readToken (token); + if ( isType (token, TOKEN_OPEN_PAREN) ) + skipArgumentList(token); + + if (isType (token, TOKEN_SEMICOLON)) + { + if ( token->nestLevel == 0 ) + { + if ( is_class ) + { + makeClassTag (name); + } else { + makeFunctionTag (name); + } + } + } + } + } + else if (isKeyword (token, KEYWORD_NONE)) + { + /* + * Only create variables for global scope + */ + if ( token->nestLevel == 0 && is_global ) + { + /* + * A pointer can be created to the function. + * If we recognize the function/class name ignore the variable. + * This format looks identical to a variable definition. + * A variable defined outside of a block is considered + * a global variable: + * var g_var1 = 1; + * var g_var2; + * This is not a global variable: + * var g_var = function; + * This is a global variable: + * var g_var = different_var_name; + */ + fulltag = vStringNew (); + if (vStringLength (token->scope) > 0) + { + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + } + else + { + vStringCopy(fulltag, token->string); + } + vStringTerminate(fulltag); + if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) && + ! stringListHas(ClassNames, vStringValue (fulltag)) ) + { + findCmdTerm (token); + if (isType (token, TOKEN_SEMICOLON)) + makeJsTag (name, JSTAG_VARIABLE); + } + vStringDelete (fulltag); + } + } + } + findCmdTerm (token); + + /* + * Statements can be optionally terminated in the case of + * statement prior to a close curly brace as in the + * document.write line below: + * + * function checkForUpdate() { + * if( 1==1 ) { + * document.write("hello from checkForUpdate
") + * } + * return 1; + * } + */ + if ( ! is_terminated && isType (token, TOKEN_CLOSE_CURLY)) + is_terminated = FALSE; + + +cleanUp: + vStringCopy(token->scope, saveScope); + deleteToken (name); + deleteToken (secondary_name); + vStringDelete(saveScope); + + return is_terminated; +} + +static boolean parseLine (tokenInfo *const token, boolean is_inside_class) +{ + boolean is_terminated = TRUE; + /* + * Detect the common statements, if, while, for, do, ... + * This is necessary since the last statement within a block "{}" + * can be optionally terminated. + * + * If the statement is not terminated, we need to tell + * the calling routine to prevent reading an additional token + * looking for the end of the statement. + */ + + if (isType(token, TOKEN_KEYWORD)) + { + switch (token->keyword) + { + case KEYWORD_for: + case KEYWORD_while: + case KEYWORD_do: + parseLoop (token); + break; + case KEYWORD_if: + case KEYWORD_else: + case KEYWORD_try: + case KEYWORD_catch: + case KEYWORD_finally: + /* Common semantics */ + is_terminated = parseIf (token); + break; + case KEYWORD_switch: + parseSwitch (token); + break; + default: + parseStatement (token, is_inside_class); + break; + } + } + else + { + /* + * Special case where single line statements may not be + * SEMICOLON terminated. parseBlock needs to know this + * so that it does not read the next token. + */ + is_terminated = parseStatement (token, is_inside_class); + } + return is_terminated; +} + +static void parseJsFile (tokenInfo *const token) +{ + do + { + readToken (token); + + if (isType(token, TOKEN_KEYWORD)) + { + switch (token->keyword) + { + case KEYWORD_function: parseFunction (token); break; + default: parseLine (token, FALSE); break; + } + } + else + { + parseLine (token, FALSE); + } + } while (TRUE); +} + +static void initialize (const langType language) +{ + Assert (sizeof (JsKinds) / sizeof (JsKinds [0]) == JSTAG_COUNT); + Lang_js = language; + buildJsKeywordHash (); +} + +static void findJsTags (void) +{ + tokenInfo *const token = newToken (); + exception_t exception; + + ClassNames = stringListNew (); + FunctionNames = stringListNew (); + + exception = (exception_t) (setjmp (Exception)); + while (exception == ExceptionNone) + parseJsFile (token); + + stringListDelete (ClassNames); + stringListDelete (FunctionNames); + ClassNames = NULL; + FunctionNames = NULL; + deleteToken (token); +} + +/* Create parser definition stucture */ +extern parserDefinition* JavaScriptParser (void) +{ + static const char *const extensions [] = { "js", NULL }; + parserDefinition *const def = parserNew ("JavaScript"); + def->extensions = extensions; + /* + * New definitions for parsing instead of regex + */ + def->kinds = JsKinds; + def->kindCount = KIND_COUNT (JsKinds); + def->parser = findJsTags; + def->initialize = initialize; + + return def; +} +/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */ diff --git a/keyword.c b/keyword.c new file mode 100644 index 0000000..2a549d9 --- /dev/null +++ b/keyword.c @@ -0,0 +1,258 @@ +/* +* $Id: keyword.c 715 2009-07-06 03:31:00Z dhiebert $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Manages a keyword hash. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "debug.h" +#include "keyword.h" +#include "options.h" +#include "routines.h" + +/* +* MACROS +*/ +#define HASH_EXPONENT 7 /* must be less than 17 */ + +/* +* DATA DECLARATIONS +*/ +typedef struct sHashEntry { + struct sHashEntry *next; + const char *string; + langType language; + int value; +} hashEntry; + +/* +* DATA DEFINITIONS +*/ +static const unsigned int TableSize = 1 << HASH_EXPONENT; +static hashEntry **HashTable = NULL; + +/* +* FUNCTION DEFINITIONS +*/ + +static hashEntry **getHashTable (void) +{ + static boolean allocated = FALSE; + + if (! allocated) + { + unsigned int i; + + HashTable = xMalloc (TableSize, hashEntry*); + + for (i = 0 ; i < TableSize ; ++i) + HashTable [i] = NULL; + + allocated = TRUE; + } + return HashTable; +} + +static hashEntry *getHashTableEntry (unsigned long hashedValue) +{ + hashEntry **const table = getHashTable (); + hashEntry *entry; + + Assert (hashedValue < TableSize); + entry = table [hashedValue]; + + return entry; +} + +static unsigned long hashValue (const char *const string) +{ + unsigned long value = 0; + const unsigned char *p; + + Assert (string != NULL); + + /* We combine the various words of the multiword key using the method + * described on page 512 of Vol. 3 of "The Art of Computer Programming". + */ + for (p = (const unsigned char *) string ; *p != '\0' ; ++p) + { + value <<= 1; + if (value & 0x00000100L) + value = (value & 0x000000ffL) + 1L; + value ^= *p; + } + /* Algorithm from page 509 of Vol. 3 of "The Art of Computer Programming" + * Treats "value" as a 16-bit integer plus 16-bit fraction. + */ + value *= 40503L; /* = 2^16 * 0.6180339887 ("golden ratio") */ + value &= 0x0000ffffL; /* keep fractional part */ + value >>= 16 - HASH_EXPONENT; /* scale up by hash size and move down */ + + return value; +} + +static hashEntry *newEntry ( + const char *const string, langType language, int value) +{ + hashEntry *const entry = xMalloc (1, hashEntry); + + entry->next = NULL; + entry->string = string; + entry->language = language; + entry->value = value; + + return entry; +} + +/* Note that it is assumed that a "value" of zero means an undefined keyword + * and clients of this function should observe this. Also, all keywords added + * should be added in lower case. If we encounter a case-sensitive language + * whose keywords are in upper case, we will need to redesign this. + */ +extern void addKeyword (const char *const string, langType language, int value) +{ + const unsigned long hashedValue = hashValue (string); + hashEntry *entry = getHashTableEntry (hashedValue); + + if (entry == NULL) + { + hashEntry **const table = getHashTable (); + table [hashedValue] = newEntry (string, language, value); + } + else + { + hashEntry *prev = NULL; + + while (entry != NULL) + { + if (language == entry->language && + strcmp (string, entry->string) == 0) + { + Assert (("Already in table" == NULL)); + } + prev = entry; + entry = entry->next; + } + if (entry == NULL) + { + Assert (prev != NULL); + prev->next = newEntry (string, language, value); + } + } +} + +extern int lookupKeyword (const char *const string, langType language) +{ + const unsigned long hashedValue = hashValue (string); + hashEntry *entry = getHashTableEntry (hashedValue); + int result = -1; + + while (entry != NULL) + { + if (language == entry->language && strcmp (string, entry->string) == 0) + { + result = entry->value; + break; + } + entry = entry->next; + } + return result; +} + +extern void freeKeywordTable (void) +{ + if (HashTable != NULL) + { + unsigned int i; + + for (i = 0 ; i < TableSize ; ++i) + { + hashEntry *entry = HashTable [i]; + + while (entry != NULL) + { + hashEntry *next = entry->next; + eFree (entry); + entry = next; + } + } + eFree (HashTable); + } +} + +extern int analyzeToken (vString *const name, langType language) +{ + vString *keyword = vStringNew (); + int result; + vStringCopyToLower (keyword, name); + result = lookupKeyword (vStringValue (keyword), language); + vStringDelete (keyword); + return result; +} + +#ifdef DEBUG + +static void printEntry (const hashEntry *const entry) +{ + printf (" %-15s %-7s\n", entry->string, getLanguageName (entry->language)); +} + +static unsigned int printBucket (const unsigned int i) +{ + hashEntry **const table = getHashTable (); + hashEntry *entry = table [i]; + unsigned int measure = 1; + boolean first = TRUE; + + printf ("%2d:", i); + if (entry == NULL) + printf ("\n"); + else while (entry != NULL) + { + if (! first) + printf (" "); + else + { + printf (" "); + first = FALSE; + } + printEntry (entry); + entry = entry->next; + measure = 2 * measure; + } + return measure - 1; +} + +extern void printKeywordTable (void) +{ + unsigned long emptyBucketCount = 0; + unsigned long measure = 0; + unsigned int i; + + for (i = 0 ; i < TableSize ; ++i) + { + const unsigned int pass = printBucket (i); + + measure += pass; + if (pass == 0) + ++emptyBucketCount; + } + + printf ("spread measure = %ld\n", measure); + printf ("%ld empty buckets\n", emptyBucketCount); +} + +#endif + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/keyword.h b/keyword.h new file mode 100644 index 0000000..e10bbfd --- /dev/null +++ b/keyword.h @@ -0,0 +1,34 @@ +/* +* $Id: keyword.h 658 2008-04-20 23:21:35Z elliotth $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to keyword.c +*/ +#ifndef _KEYWORD_H +#define _KEYWORD_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include "parse.h" + +/* +* FUNCTION PROTOTYPES +*/ +extern void addKeyword (const char *const string, langType language, int value); +extern int lookupKeyword (const char *const string, langType language); +extern void freeKeywordTable (void); +#ifdef DEBUG +extern void printKeywordTable (void); +#endif +extern int analyzeToken (vString *const name, langType language); + +#endif /* _KEYWORD_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/lisp.c b/lisp.c new file mode 100644 index 0000000..6fdc4dd --- /dev/null +++ b/lisp.c @@ -0,0 +1,139 @@ +/* +* $Id: lisp.c 717 2009-07-07 03:40:50Z dhiebert $ +* +* Copyright (c) 2000-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for LISP files. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include "parse.h" +#include "read.h" +#include "vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_FUNCTION +} lispKind; + +static kindOption LispKinds [] = { + { TRUE, 'f', "function", "functions" } +}; + +/* +* FUNCTION DEFINITIONS +*/ + +/* + * lisp tag functions + * look for (def or (DEF, quote or QUOTE + */ +static int L_isdef (const unsigned char *strp) +{ + return ( (strp [1] == 'd' || strp [1] == 'D') + && (strp [2] == 'e' || strp [2] == 'E') + && (strp [3] == 'f' || strp [3] == 'F')); +} + +static int L_isquote (const unsigned char *strp) +{ + return ( (*(++strp) == 'q' || *strp == 'Q') + && (*(++strp) == 'u' || *strp == 'U') + && (*(++strp) == 'o' || *strp == 'O') + && (*(++strp) == 't' || *strp == 'T') + && (*(++strp) == 'e' || *strp == 'E') + && isspace (*(++strp))); +} + +static void L_getit (vString *const name, const unsigned char *dbp) +{ + const unsigned char *p; + + if (*dbp == '\'') /* Skip prefix quote */ + dbp++; + else if (*dbp == '(' && L_isquote (dbp)) /* Skip "(quote " */ + { + dbp += 7; + while (isspace (*dbp)) + dbp++; + } + for (p=dbp ; *p!='\0' && *p!='(' && !isspace ((int) *p) && *p!=')' ; p++) + vStringPut (name, *p); + vStringTerminate (name); + + if (vStringLength (name) > 0) + makeSimpleTag (name, LispKinds, K_FUNCTION); + vStringClear (name); +} + +/* Algorithm adapted from from GNU etags. + */ +static void findLispTags (void) +{ + vString *name = vStringNew (); + const unsigned char* p; + + + while ((p = fileReadLine ()) != NULL) + { + if (*p == '(') + { + if (L_isdef (p)) + { + while (*p != '\0' && !isspace ((int) *p)) + p++; + while (isspace ((int) *p)) + p++; + L_getit (name, p); + } + else + { + /* Check for (foo::defmumble name-defined ... */ + do + p++; + while (*p != '\0' && !isspace ((int) *p) + && *p != ':' && *p != '(' && *p != ')'); + if (*p == ':') + { + do + p++; + while (*p == ':'); + + if (L_isdef (p - 1)) + { + while (*p != '\0' && !isspace ((int) *p)) + p++; + while (isspace (*p)) + p++; + L_getit (name, p); + } + } + } + } + } + vStringDelete (name); +} + +extern parserDefinition* LispParser (void) +{ + static const char *const extensions [] = { + "cl", "clisp", "el", "l", "lisp", "lsp", NULL + }; + parserDefinition* def = parserNew ("Lisp"); + def->kinds = LispKinds; + def->kindCount = KIND_COUNT (LispKinds); + def->extensions = extensions; + def->parser = findLispTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/lregex.c b/lregex.c new file mode 100644 index 0000000..59f5df6 --- /dev/null +++ b/lregex.c @@ -0,0 +1,704 @@ +/* +* $Id: lregex.c 576 2007-06-30 04:16:23Z elliotth $ +* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for applying regular expression matching. +* +* The code for utlizing the Gnu regex package with regards to processing the +* regex option and checking for regex matches was adapted from routines in +* Gnu etags. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#ifdef HAVE_REGCOMP +# include +# include +# ifdef HAVE_SYS_TYPES_H +# include /* declare off_t (not known to regex.h on FreeBSD) */ +# endif +# include +#endif + +#include "debug.h" +#include "entry.h" +#include "parse.h" +#include "read.h" +#include "routines.h" + +#ifdef HAVE_REGEX + +/* +* MACROS +*/ + +/* Back-references \0 through \9 */ +#define BACK_REFERENCE_COUNT 10 + +#if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN) +# define POSIX_REGEX +#endif + +#define REGEX_NAME "Regex" + +/* +* DATA DECLARATIONS +*/ +#if defined (POSIX_REGEX) + +struct sKind { + boolean enabled; + char letter; + char* name; + char* description; +}; + +enum pType { PTRN_TAG, PTRN_CALLBACK }; + +typedef struct { + regex_t *pattern; + enum pType type; + union { + struct { + char *name_pattern; + struct sKind kind; + } tag; + struct { + regexCallback function; + } callback; + } u; +} regexPattern; + +#endif + +typedef struct { + regexPattern *patterns; + unsigned int count; +} patternSet; + +/* +* DATA DEFINITIONS +*/ + +static boolean regexBroken = FALSE; + +/* Array of pattern sets, indexed by language */ +static patternSet* Sets = NULL; +static int SetUpper = -1; /* upper language index in list */ + +/* +* FUNCTION DEFINITIONS +*/ + +static void clearPatternSet (const langType language) +{ + if (language <= SetUpper) + { + patternSet* const set = Sets + language; + unsigned int i; + for (i = 0 ; i < set->count ; ++i) + { + regexPattern *p = &set->patterns [i]; +#if defined (POSIX_REGEX) + regfree (p->pattern); +#endif + eFree (p->pattern); + p->pattern = NULL; + + if (p->type == PTRN_TAG) + { + eFree (p->u.tag.name_pattern); + p->u.tag.name_pattern = NULL; + eFree (p->u.tag.kind.name); + p->u.tag.kind.name = NULL; + if (p->u.tag.kind.description != NULL) + { + eFree (p->u.tag.kind.description); + p->u.tag.kind.description = NULL; + } + } + } + if (set->patterns != NULL) + eFree (set->patterns); + set->patterns = NULL; + set->count = 0; + } +} + +/* +* Regex psuedo-parser +*/ + +static void makeRegexTag ( + const vString* const name, const struct sKind* const kind) +{ + if (kind->enabled) + { + tagEntryInfo e; + Assert (name != NULL && vStringLength (name) > 0); + Assert (kind != NULL); + initTagEntry (&e, vStringValue (name)); + e.kind = kind->letter; + e.kindName = kind->name; + makeTagEntry (&e); + } +} + +/* +* Regex pattern definition +*/ + +/* Take a string like "/blah/" and turn it into "blah", making sure + * that the first and last characters are the same, and handling + * quoted separator characters. Actually, stops on the occurrence of + * an unquoted separator. Also turns "\t" into a Tab character. + * Returns pointer to terminating separator. Works in place. Null + * terminates name string. + */ +static char* scanSeparators (char* name) +{ + char sep = name [0]; + char *copyto = name; + boolean quoted = FALSE; + + for (++name ; *name != '\0' ; ++name) + { + if (quoted) + { + if (*name == sep) + *copyto++ = sep; + else if (*name == 't') + *copyto++ = '\t'; + else + { + /* Something else is quoted, so preserve the quote. */ + *copyto++ = '\\'; + *copyto++ = *name; + } + quoted = FALSE; + } + else if (*name == '\\') + quoted = TRUE; + else if (*name == sep) + { + break; + } + else + *copyto++ = *name; + } + *copyto = '\0'; + return name; +} + +/* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator + * character is whatever the first character of `regexp' is), by breaking it + * up into null terminated strings, removing the separators, and expanding + * '\t' into tabs. When complete, `regexp' points to the line matching + * pattern, a pointer to the name matching pattern is written to `name', a + * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer + * to the trailing flags is written to `flags'. If the pattern is not in the + * correct format, a false value is returned. + */ +static boolean parseTagRegex ( + char* const regexp, char** const name, + char** const kinds, char** const flags) +{ + boolean result = FALSE; + const int separator = (unsigned char) regexp [0]; + + *name = scanSeparators (regexp); + if (*regexp == '\0') + error (WARNING, "empty regexp"); + else if (**name != separator) + error (WARNING, "%s: incomplete regexp", regexp); + else + { + char* const third = scanSeparators (*name); + if (**name == '\0') + error (WARNING, "%s: regexp missing name pattern", regexp); + if ((*name) [strlen (*name) - 1] == '\\') + error (WARNING, "error in name pattern: \"%s\"", *name); + if (*third != separator) + error (WARNING, "%s: regexp missing final separator", regexp); + else + { + char* const fourth = scanSeparators (third); + if (*fourth == separator) + { + *kinds = third; + scanSeparators (fourth); + *flags = fourth; + } + else + { + *flags = third; + *kinds = NULL; + } + result = TRUE; + } + } + return result; +} + +static void addCompiledTagPattern ( + const langType language, regex_t* const pattern, + char* const name, const char kind, char* const kindName, + char *const description) +{ + patternSet* set; + regexPattern *ptrn; + if (language > SetUpper) + { + int i; + Sets = xRealloc (Sets, (language + 1), patternSet); + for (i = SetUpper + 1 ; i <= language ; ++i) + { + Sets [i].patterns = NULL; + Sets [i].count = 0; + } + SetUpper = language; + } + set = Sets + language; + set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern); + ptrn = &set->patterns [set->count]; + set->count += 1; + + ptrn->pattern = pattern; + ptrn->type = PTRN_TAG; + ptrn->u.tag.name_pattern = name; + ptrn->u.tag.kind.enabled = TRUE; + ptrn->u.tag.kind.letter = kind; + ptrn->u.tag.kind.name = kindName; + ptrn->u.tag.kind.description = description; +} + +static void addCompiledCallbackPattern ( + const langType language, regex_t* const pattern, + const regexCallback callback) +{ + patternSet* set; + regexPattern *ptrn; + if (language > SetUpper) + { + int i; + Sets = xRealloc (Sets, (language + 1), patternSet); + for (i = SetUpper + 1 ; i <= language ; ++i) + { + Sets [i].patterns = NULL; + Sets [i].count = 0; + } + SetUpper = language; + } + set = Sets + language; + set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern); + ptrn = &set->patterns [set->count]; + set->count += 1; + + ptrn->pattern = pattern; + ptrn->type = PTRN_CALLBACK; + ptrn->u.callback.function = callback; +} + +#if defined (POSIX_REGEX) + +static regex_t* compileRegex (const char* const regexp, const char* const flags) +{ + int cflags = REG_EXTENDED | REG_NEWLINE; + regex_t *result = NULL; + int errcode; + int i; + for (i = 0 ; flags != NULL && flags [i] != '\0' ; ++i) + { + switch ((int) flags [i]) + { + case 'b': cflags &= ~REG_EXTENDED; break; + case 'e': cflags |= REG_EXTENDED; break; + case 'i': cflags |= REG_ICASE; break; + default: error (WARNING, "unknown regex flag: '%c'", *flags); break; + } + } + result = xMalloc (1, regex_t); + errcode = regcomp (result, regexp, cflags); + if (errcode != 0) + { + char errmsg[256]; + regerror (errcode, result, errmsg, 256); + error (WARNING, "regcomp %s: %s", regexp, errmsg); + regfree (result); + eFree (result); + result = NULL; + } + return result; +} + +#endif + +static void parseKinds ( + const char* const kinds, char* const kind, char** const kindName, + char **description) +{ + *kind = '\0'; + *kindName = NULL; + *description = NULL; + if (kinds == NULL || kinds [0] == '\0') + { + *kind = 'r'; + *kindName = eStrdup ("regex"); + } + else if (kinds [0] != '\0') + { + const char* k = kinds; + if (k [0] != ',' && (k [1] == ',' || k [1] == '\0')) + *kind = *k++; + else + *kind = 'r'; + if (*k == ',') + ++k; + if (k [0] == '\0') + *kindName = eStrdup ("regex"); + else + { + const char *const comma = strchr (k, ','); + if (comma == NULL) + *kindName = eStrdup (k); + else + { + *kindName = (char*) eMalloc (comma - k + 1); + strncpy (*kindName, k, comma - k); + (*kindName) [comma - k] = '\0'; + k = comma + 1; + if (k [0] != '\0') + *description = eStrdup (k); + } + } + } +} + +static void printRegexKind (const regexPattern *pat, unsigned int i, boolean indent) +{ + const struct sKind *const kind = &pat [i].u.tag.kind; + const char *const indentation = indent ? " " : ""; + Assert (pat [i].type == PTRN_TAG); + printf ("%s%c %s %s\n", indentation, + kind->letter != '\0' ? kind->letter : '?', + kind->description != NULL ? kind->description : kind->name, + kind->enabled ? "" : " [off]"); +} + +static void processLanguageRegex (const langType language, + const char* const parameter) +{ + if (parameter == NULL || parameter [0] == '\0') + clearPatternSet (language); + else if (parameter [0] != '@') + addLanguageRegex (language, parameter); + else if (! doesFileExist (parameter + 1)) + error (WARNING, "cannot open regex file"); + else + { + const char* regexfile = parameter + 1; + FILE* const fp = fopen (regexfile, "r"); + if (fp == NULL) + error (WARNING | PERROR, regexfile); + else + { + vString* const regex = vStringNew (); + while (readLine (regex, fp)) + addLanguageRegex (language, vStringValue (regex)); + fclose (fp); + vStringDelete (regex); + } + } +} + +/* +* Regex pattern matching +*/ + +#if defined (POSIX_REGEX) + +static vString* substitute ( + const char* const in, const char* out, + const int nmatch, const regmatch_t* const pmatch) +{ + vString* result = vStringNew (); + const char* p; + for (p = out ; *p != '\0' ; p++) + { + if (*p == '\\' && isdigit ((int) *++p)) + { + const int dig = *p - '0'; + if (0 < dig && dig < nmatch && pmatch [dig].rm_so != -1) + { + const int diglen = pmatch [dig].rm_eo - pmatch [dig].rm_so; + vStringNCatS (result, in + pmatch [dig].rm_so, diglen); + } + } + else if (*p != '\n' && *p != '\r') + vStringPut (result, *p); + } + vStringTerminate (result); + return result; +} + +static void matchTagPattern (const vString* const line, + const regexPattern* const patbuf, + const regmatch_t* const pmatch) +{ + vString *const name = substitute (vStringValue (line), + patbuf->u.tag.name_pattern, BACK_REFERENCE_COUNT, pmatch); + vStringStripLeading (name); + vStringStripTrailing (name); + if (vStringLength (name) > 0) + makeRegexTag (name, &patbuf->u.tag.kind); + else + error (WARNING, "%s:%ld: null expansion of name pattern \"%s\"", + getInputFileName (), getInputLineNumber (), + patbuf->u.tag.name_pattern); + vStringDelete (name); +} + +static void matchCallbackPattern ( + const vString* const line, const regexPattern* const patbuf, + const regmatch_t* const pmatch) +{ + regexMatch matches [BACK_REFERENCE_COUNT]; + unsigned int count = 0; + int i; + for (i = 0 ; i < BACK_REFERENCE_COUNT && pmatch [i].rm_so != -1 ; ++i) + { + matches [i].start = pmatch [i].rm_so; + matches [i].length = pmatch [i].rm_eo - pmatch [i].rm_so; + ++count; + } + patbuf->u.callback.function (vStringValue (line), matches, count); +} + +static boolean matchRegexPattern (const vString* const line, + const regexPattern* const patbuf) +{ + boolean result = FALSE; + regmatch_t pmatch [BACK_REFERENCE_COUNT]; + const int match = regexec (patbuf->pattern, vStringValue (line), + BACK_REFERENCE_COUNT, pmatch, 0); + if (match == 0) + { + result = TRUE; + if (patbuf->type == PTRN_TAG) + matchTagPattern (line, patbuf, pmatch); + else if (patbuf->type == PTRN_CALLBACK) + matchCallbackPattern (line, patbuf, pmatch); + else + { + Assert ("invalid pattern type" == NULL); + result = FALSE; + } + } + return result; +} + +#endif + +/* PUBLIC INTERFACE */ + +/* Match against all patterns for specified language. Returns true if at least + * on pattern matched. + */ +extern boolean matchRegex (const vString* const line, const langType language) +{ + boolean result = FALSE; + if (language != LANG_IGNORE && language <= SetUpper && + Sets [language].count > 0) + { + const patternSet* const set = Sets + language; + unsigned int i; + for (i = 0 ; i < set->count ; ++i) + if (matchRegexPattern (line, set->patterns + i)) + result = TRUE; + } + return result; +} + +extern void findRegexTags (void) +{ + /* merely read all lines of the file */ + while (fileReadLine () != NULL) + ; +} + +#endif /* HAVE_REGEX */ + +extern void addTagRegex ( + const langType language __unused__, + const char* const regex __unused__, + const char* const name __unused__, + const char* const kinds __unused__, + const char* const flags __unused__) +{ +#ifdef HAVE_REGEX + Assert (regex != NULL); + Assert (name != NULL); + if (! regexBroken) + { + regex_t* const cp = compileRegex (regex, flags); + if (cp != NULL) + { + char kind; + char* kindName; + char* description; + parseKinds (kinds, &kind, &kindName, &description); + addCompiledTagPattern (language, cp, eStrdup (name), + kind, kindName, description); + } + } +#endif +} + +extern void addCallbackRegex ( + const langType language __unused__, + const char* const regex __unused__, + const char* const flags __unused__, + const regexCallback callback __unused__) +{ +#ifdef HAVE_REGEX + Assert (regex != NULL); + if (! regexBroken) + { + regex_t* const cp = compileRegex (regex, flags); + if (cp != NULL) + addCompiledCallbackPattern (language, cp, callback); + } +#endif +} + +extern void addLanguageRegex ( + const langType language __unused__, const char* const regex __unused__) +{ +#ifdef HAVE_REGEX + if (! regexBroken) + { + char *const regex_pat = eStrdup (regex); + char *name, *kinds, *flags; + if (parseTagRegex (regex_pat, &name, &kinds, &flags)) + { + addTagRegex (language, regex_pat, name, kinds, flags); + eFree (regex_pat); + } + } +#endif +} + +/* +* Regex option parsing +*/ + +extern boolean processRegexOption (const char *const option, + const char *const parameter __unused__) +{ + boolean handled = FALSE; + const char* const dash = strchr (option, '-'); + if (dash != NULL && strncmp (option, "regex", dash - option) == 0) + { +#ifdef HAVE_REGEX + langType language; + language = getNamedLanguage (dash + 1); + if (language == LANG_IGNORE) + error (WARNING, "unknown language \"%s\" in --%s option", (dash + 1), option); + else + processLanguageRegex (language, parameter); +#else + error (WARNING, "regex support not available; required for --%s option", + option); +#endif + handled = TRUE; + } + return handled; +} + +extern void disableRegexKinds (const langType language __unused__) +{ +#ifdef HAVE_REGEX + if (language <= SetUpper && Sets [language].count > 0) + { + patternSet* const set = Sets + language; + unsigned int i; + for (i = 0 ; i < set->count ; ++i) + if (set->patterns [i].type == PTRN_TAG) + set->patterns [i].u.tag.kind.enabled = FALSE; + } +#endif +} + +extern boolean enableRegexKind ( + const langType language __unused__, + const int kind __unused__, const boolean mode __unused__) +{ + boolean result = FALSE; +#ifdef HAVE_REGEX + if (language <= SetUpper && Sets [language].count > 0) + { + patternSet* const set = Sets + language; + unsigned int i; + for (i = 0 ; i < set->count ; ++i) + if (set->patterns [i].type == PTRN_TAG && + set->patterns [i].u.tag.kind.letter == kind) + { + set->patterns [i].u.tag.kind.enabled = mode; + result = TRUE; + } + } +#endif + return result; +} + +extern void printRegexKinds (const langType language __unused__, boolean indent __unused__) +{ +#ifdef HAVE_REGEX + if (language <= SetUpper && Sets [language].count > 0) + { + patternSet* const set = Sets + language; + unsigned int i; + for (i = 0 ; i < set->count ; ++i) + if (set->patterns [i].type == PTRN_TAG) + printRegexKind (set->patterns, i, indent); + } +#endif +} + +extern void freeRegexResources (void) +{ +#ifdef HAVE_REGEX + int i; + for (i = 0 ; i <= SetUpper ; ++i) + clearPatternSet (i); + if (Sets != NULL) + eFree (Sets); + Sets = NULL; + SetUpper = -1; +#endif +} + +/* Check for broken regcomp() on Cygwin */ +extern void checkRegex (void) +{ +#if defined (HAVE_REGEX) && defined (CHECK_REGCOMP) + regex_t patbuf; + int errcode; + if (regcomp (&patbuf, "/hello/", 0) != 0) + { + error (WARNING, "Disabling broken regex"); + regexBroken = TRUE; + } +#endif +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/lua.c b/lua.c new file mode 100644 index 0000000..d385544 --- /dev/null +++ b/lua.c @@ -0,0 +1,133 @@ +/* +* $Id: lua.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2000-2001, Max Ischenko . +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Lua language. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "options.h" +#include "parse.h" +#include "read.h" +#include "vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_FUNCTION +} luaKind; + +static kindOption LuaKinds [] = { + { TRUE, 'f', "function", "functions" } +}; + +/* +* FUNCTION DEFINITIONS +*/ + +/* for debugging purposes */ +static void __unused__ print_string (char *p, char *q) +{ + for ( ; p != q; p++) + fprintf (errout, "%c", *p); + fprintf (errout, "\n"); +} + +/* + * Helper function. + * Returns 1 if line looks like a line of Lua code. + * + * TODO: Recognize UNIX bang notation. + * (Lua treat first line as a comment if it starts with #!) + * + */ +static boolean is_a_code_line (const unsigned char *line) +{ + boolean result; + const unsigned char *p = line; + while (isspace ((int) *p)) + p++; + if (p [0] == '\0') + result = FALSE; + else if (p [0] == '-' && p [1] == '-') + result = FALSE; + else + result = TRUE; + return result; +} + +static void extract_name (const char *begin, const char *end, vString *name) +{ + if (begin != NULL && end != NULL && begin < end) + { + const char *cp; + + while (isspace ((int) *begin)) + begin++; + while (isspace ((int) *end)) + end--; + if (begin < end) + { + for (cp = begin ; cp != end; cp++) + vStringPut (name, (int) *cp); + vStringTerminate (name); + + makeSimpleTag (name, LuaKinds, K_FUNCTION); + vStringClear (name); + } + } +} + +static void findLuaTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + const char *p, *q; + + if (! is_a_code_line (line)) + continue; + + p = (const char*) strstr ((const char*) line, "function"); + if (p == NULL) + continue; + + q = strchr ((const char*) line, '='); + + if (q == NULL) { + p = p + 9; /* skip the `function' word */ + q = strchr ((const char*) p, '('); + extract_name (p, q, name); + } else { + p = (const char*) &line[0]; + extract_name (p, q, name); + } + } + vStringDelete (name); +} + +extern parserDefinition* LuaParser (void) +{ + static const char* const extensions [] = { "lua", NULL }; + parserDefinition* def = parserNew ("Lua"); + def->kinds = LuaKinds; + def->kindCount = KIND_COUNT (LuaKinds); + def->extensions = extensions; + def->parser = findLuaTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/mac.c b/mac.c new file mode 100644 index 0000000..af4d16f --- /dev/null +++ b/mac.c @@ -0,0 +1,273 @@ +/* +* $Id: mac.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2001, Maarten L. Hekkelman +* +* Author: Maarten L. Hekkelman +* http://www.hekkelman.com +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. It is provided on an as-is basis and no +* responsibility is accepted for its failure to perform as expected. +* +* This module contains support functions for Exuberant Ctags on Macintosh. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" + +#include +#include +#include +#include +#include + +/* +* FUNCTION DEFINITIONS +*/ + +static int get_path(const char* in_unix_path, unsigned char* out_mac_path) +{ + int l = strlen(in_unix_path); + int result = 0; + + if (l > 254) + result = -1; + else + { + const char* s = in_unix_path; + char *d = (char*)out_mac_path + 1; + + if (*s != '/') + *d++ = ':'; + else + ++s; + + while (*s) + { + if (s[0] == '.' && s[1] == '.' && s[2] == '/') + { + s += 3; + *d++ = ':'; + } + else if (s[0] == '.' && s[1] == '/') + s += 2; + else if (s[0] == '/') + { + *d++ = ':'; + + ++s; + while (*s == '/') + ++s; + } + else + *d++ = *s++; + } + + out_mac_path[0] = (d - (char*)out_mac_path) - 1; + } + + return result; +} + +DIR *opendir(const char *dirname) +{ + DIR* dirp = (DIR*)calloc(1, sizeof(DIR)); + + if (dirp != NULL) + { + OSErr err; + Str255 s; + CInfoPBRec pb = { 0 }; + + if (strcmp(dirname, ".")) + { + get_path(dirname, s); + pb.hFileInfo.ioNamePtr = s; + } + else + pb.hFileInfo.ioNamePtr = NULL; + + err = PBGetCatInfoSync(&pb); + if (err != noErr || (pb.hFileInfo.ioFlAttrib & ioDirMask) == 0) + { + free(dirp); + dirp = NULL; + } + else + { + dirp->file.vRefNum = pb.hFileInfo.ioVRefNum; + dirp->file.parID = pb.hFileInfo.ioDirID; + dirp->file.name[0] = '\0'; + dirp->index = 1; + } + } + + return dirp; +} + +struct dirent *readdir(DIR *dirp) +{ + if (dirp) + { + CInfoPBRec pb = { 0 }; + + pb.hFileInfo.ioVRefNum = dirp->file.vRefNum; + pb.hFileInfo.ioDirID = dirp->file.parID; + pb.hFileInfo.ioFDirIndex = dirp->index++; + pb.hFileInfo.ioNamePtr = dirp->file.name; + + if (PBGetCatInfoSync(&pb) != noErr) + return NULL; + + memcpy(dirp->ent.d_name, dirp->file.name + 1, dirp->file.name[0]); + dirp->ent.d_name[dirp->file.name[0]] = 0; + return &dirp->ent; + } + return NULL; +} + +int closedir(DIR *dirp) +{ + if (dirp) + free(dirp); + return 0; +} + +void rewinddir(DIR *dirp) +{ + if (dirp) + dirp->index = 1; +} + +int mstat(const char* file, struct stat* st) +{ + CInfoPBRec pb; + unsigned char path[256]; + int result = 0; + + memset(&pb, 0, sizeof(CInfoPBRec)); + + if (strcmp(file, ".") == 0) + { + memset(st, 0, sizeof(struct stat)); + st->st_mode = S_IFDIR; + st->st_ino = -1; + } + else + { + result = get_path(file, path); + + if (result == 0) + { + pb.hFileInfo.ioNamePtr = path; + + if (PBGetCatInfoSync(&pb) != noErr) + result = -1; + else + { + memset(st, 0, sizeof(struct stat)); + + if (pb.hFileInfo.ioFlAttrib & ioDirMask) + st->st_mode = S_IFDIR; + else + st->st_mode = S_IFREG; + + st->st_ino = pb.hFileInfo.ioFlStBlk; + st->st_dev = pb.hFileInfo.ioVRefNum; + st->st_nlink = 1; + st->st_size = pb.hFileInfo.ioFlLgLen; + st->st_atime = pb.hFileInfo.ioFlMdDat; + st->st_mtime = pb.hFileInfo.ioFlMdDat; + st->st_ctime = pb.hFileInfo.ioFlCrDat; + } + } + } + + return result; +} + +#undef fopen + +FILE* mfopen(const char* file, const char* mode) +{ + unsigned char path[256]; + + if (get_path(file, path) == 0) + { + int l = path[0]; + memmove(path, path + 1, l); + path[l] = 0; + return fopen((char*)path, mode); + } + else + return NULL; +} + +char* getcwd(char* out_path, int out_path_len) +{ + OSErr err = noErr; + CInfoPBRec pb; + FSSpec cwd; + + if (out_path == NULL) + { + if (out_path_len < PATH_MAX) + out_path_len = PATH_MAX; + out_path = (char*)malloc(out_path_len); + } + + err = FSMakeFSSpec(0, 0, "\p:", &cwd); + + if (cwd.parID == fsRtParID) + { + *out_path = '/'; + memcpy(out_path + 1, cwd.name + 1, cwd.name[0]); + out_path[1 + cwd.name[0]] = 0; + } + else + { + /* The object isn't a volume */ + + /* Is the object a file or a directory? */ + + char t[PATH_MAX]; + char* s; + + s = t + PATH_MAX - cwd.name[0] - 1; + memcpy(s, cwd.name + 1, cwd.name[0]); + s[cwd.name[0]] = 0; + + /* Get the ancestor directory names */ + pb.dirInfo.ioNamePtr = cwd.name; + pb.dirInfo.ioVRefNum = cwd.vRefNum; + pb.dirInfo.ioDrParID = cwd.parID; + do /* loop until we have an error or find the root directory */ + { + pb.dirInfo.ioFDirIndex = -1; + pb.dirInfo.ioDrDirID = pb.dirInfo.ioDrParID; + err = PBGetCatInfoSync(&pb); + if ( err == noErr ) + { + *--s = '/'; + s -= cwd.name[0]; + memcpy(s, cwd.name + 1, cwd.name[0]); + } + } + while (err == noErr && pb.dirInfo.ioDrDirID != fsRtDirID && s > t + 1); + + if (s > t + 1) + { + *--s = '/'; + strcpy(out_path, s); + } + else + strcpy(out_path, "."); + } + + return out_path; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/magic.diff b/magic.diff new file mode 100644 index 0000000..0457756 --- /dev/null +++ b/magic.diff @@ -0,0 +1,21 @@ +This file contains a patch to the Linux file /usr/share/magic which will +allow the "file" command to properly identify tags file producted by +Exuberant Ctags. To apply the patch, issue the following command as root: + + patch -p0 < magic.diff + +*** /usr/share/magic.orig Wed Feb 16 19:04:09 2000 +--- /usr/share/magic Mon Aug 14 20:27:01 2000 +*************** +*** 1155,1160 **** +--- 1155,1164 ---- + >84 belong&0x18000000 =0x18000000 undefined fpmode + + #------------------------------------------------------------------------------ ++ # ctags: file (1) magic for Exuberant Ctags files ++ 0 string !_TAG Exuberant Ctags tag file ++ ++ #------------------------------------------------------------------------------ + # database: file(1) magic for various databases + # + # extracted from header/code files by Graeme Wilford (eep2gw@ee.surrey.ac.uk) diff --git a/main.c b/main.c new file mode 100644 index 0000000..79948fe --- /dev/null +++ b/main.c @@ -0,0 +1,579 @@ +/* +* $Id: main.c 536 2007-06-02 06:09:00Z elliotth $ +* +* Copyright (c) 1996-2003, Darren Hiebert +* +* Author: Darren Hiebert +* http://ctags.sourceforge.net +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. It is provided on an as-is basis and no +* responsibility is accepted for its failure to perform as expected. +* +* This is a reimplementation of the ctags (1) program. It is an attempt to +* provide a fully featured ctags program which is free of the limitations +* which most (all?) others are subject to. +* +* This module contains the start-up code and routines to determine the list +* of files to parsed for tags. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +/* To provide timings features if available. + */ +#ifdef HAVE_CLOCK +# ifdef HAVE_TIME_H +# include +# endif +#else +# ifdef HAVE_TIMES +# ifdef HAVE_SYS_TIMES_H +# include +# endif +# endif +#endif + +/* To provide directory searching for recursion feature. + */ +#ifdef AMIGA +# include /* for struct AnchorPath */ +# include /* function prototypes */ +# define ANCHOR_BUF_SIZE 512 +# define ANCHOR_SIZE (sizeof (struct AnchorPath) + ANCHOR_BUF_SIZE) +# ifdef __SASC + extern struct DosLibrary *DOSBase; +# include +# endif +#endif + +#ifdef HAVE_DIRENT_H +# ifdef __BORLANDC__ +# define boolean BORLAND_boolean +# endif +# ifdef HAVE_SYS_TYPES_H +# include /* required by dirent.h */ +# endif +# include /* to declare opendir() */ +# undef boolean +#endif +#ifdef HAVE_DIRECT_H +# include /* to _getcwd() */ +#endif +#ifdef HAVE_DOS_H +# include /* to declare FA_DIREC */ +#endif +#ifdef HAVE_DIR_H +# include /* to declare findfirst() and findnext */ +#endif +#ifdef HAVE_IO_H +# include /* to declare _findfirst() */ +#endif + + +#include "debug.h" +#include "keyword.h" +#include "main.h" +#include "options.h" +#include "read.h" +#include "routines.h" + +/* +* MACROS +*/ +#define plural(value) (((unsigned long)(value) == 1L) ? "" : "s") + +/* +* DATA DEFINITIONS +*/ +static struct { long files, lines, bytes; } Totals = { 0, 0, 0 }; + +#ifdef AMIGA +# include "ctags.h" + static const char *VERsion = "$VER: "PROGRAM_NAME" "PROGRAM_VERSION" " +# ifdef __SASC + __AMIGADATE__ +# else + __DATE__ +# endif + " "AUTHOR_NAME" $"; +#endif + +/* +* FUNCTION PROTOTYPES +*/ +static boolean createTagsForEntry (const char *const entryName); + +/* +* FUNCTION DEFINITIONS +*/ + +extern void addTotals ( + const unsigned int files, const long unsigned int lines, + const long unsigned int bytes) +{ + Totals.files += files; + Totals.lines += lines; + Totals.bytes += bytes; +} + +extern boolean isDestinationStdout (void) +{ + boolean toStdout = FALSE; + + if (Option.xref || Option.filter || + (Option.tagFileName != NULL && (strcmp (Option.tagFileName, "-") == 0 +#if defined (VMS) + || strcmp (Option.tagFileName, "sys$output") == 0 +#else + || strcmp (Option.tagFileName, "/dev/stdout") == 0 +#endif + ))) + toStdout = TRUE; + return toStdout; +} + +#if defined (HAVE_OPENDIR) +static boolean recurseUsingOpendir (const char *const dirName) +{ + boolean resize = FALSE; + DIR *const dir = opendir (dirName); + if (dir == NULL) + error (WARNING | PERROR, "cannot recurse into directory \"%s\"", dirName); + else + { + struct dirent *entry; + while ((entry = readdir (dir)) != NULL) + { + if (strcmp (entry->d_name, ".") != 0 && + strcmp (entry->d_name, "..") != 0) + { + vString *filePath; + if (strcmp (dirName, ".") == 0) + filePath = vStringNewInit (entry->d_name); + else + filePath = combinePathAndFile (dirName, entry->d_name); + resize |= createTagsForEntry (vStringValue (filePath)); + vStringDelete (filePath); + } + } + closedir (dir); + } + return resize; +} + +#elif defined (HAVE_FINDFIRST) || defined (HAVE__FINDFIRST) + +static boolean createTagsForWildcardEntry ( + const char *const pattern, const size_t dirLength, + const char *const entryName) +{ + boolean resize = FALSE; + /* we must not recurse into the directories "." or ".." */ + if (strcmp (entryName, ".") != 0 && strcmp (entryName, "..") != 0) + { + vString *const filePath = vStringNew (); + vStringNCopyS (filePath, pattern, dirLength); + vStringCatS (filePath, entryName); + resize = createTagsForEntry (vStringValue (filePath)); + vStringDelete (filePath); + } + return resize; +} + +static boolean createTagsForWildcardUsingFindfirst (const char *const pattern) +{ + boolean resize = FALSE; + const size_t dirLength = baseFilename (pattern) - pattern; +#if defined (HAVE_FINDFIRST) + struct ffblk fileInfo; + int result = findfirst (pattern, &fileInfo, FA_DIREC); + while (result == 0) + { + const char *const entry = (const char *) fileInfo.ff_name; + resize |= createTagsForWildcardEntry (pattern, dirLength, entry); + result = findnext (&fileInfo); + } +#elif defined (HAVE__FINDFIRST) + struct _finddata_t fileInfo; + findfirst_t hFile = _findfirst (pattern, &fileInfo); + if (hFile != -1L) + { + do + { + const char *const entry = (const char *) fileInfo.name; + resize |= createTagsForWildcardEntry (pattern, dirLength, entry); + } while (_findnext (hFile, &fileInfo) == 0); + _findclose (hFile); + } +#endif + return resize; +} + +#elif defined (AMIGA) + +static boolean createTagsForAmigaWildcard (const char *const pattern) +{ + boolean resize = FALSE; + struct AnchorPath *const anchor = + (struct AnchorPath *) eMalloc ((size_t) ANCHOR_SIZE); + LONG result; + + memset (anchor, 0, (size_t) ANCHOR_SIZE); + anchor->ap_Strlen = ANCHOR_BUF_SIZE; + /* Allow '.' for current directory */ +#ifdef APF_DODOT + anchor->ap_Flags = APF_DODOT | APF_DOWILD; +#else + anchor->ap_Flags = APF_DoDot | APF_DoWild; +#endif + result = MatchFirst ((UBYTE *) pattern, anchor); + while (result == 0) + { + resize |= createTagsForEntry ((char *) anchor->ap_Buf); + result = MatchNext (anchor); + } + MatchEnd (anchor); + eFree (anchor); + return resize; +} +#endif + +static boolean recurseIntoDirectory (const char *const dirName) +{ + boolean resize = FALSE; + if (isRecursiveLink (dirName)) + verbose ("ignoring \"%s\" (recursive link)\n", dirName); + else if (! Option.recurse) + verbose ("ignoring \"%s\" (directory)\n", dirName); + else + { + verbose ("RECURSING into directory \"%s\"\n", dirName); +#if defined (HAVE_OPENDIR) + resize = recurseUsingOpendir (dirName); +#elif defined (HAVE_FINDFIRST) || defined (HAVE__FINDFIRST) + { + vString *const pattern = vStringNew (); + vStringCopyS (pattern, dirName); + vStringPut (pattern, OUTPUT_PATH_SEPARATOR); + vStringCatS (pattern, "*.*"); + resize = createTagsForWildcardUsingFindfirst (vStringValue (pattern)); + vStringDelete (pattern); + } +#elif defined (AMIGA) + { + vString *const pattern = vStringNew (); + if (*dirName != '\0' && strcmp (dirName, ".") != 0) + { + vStringCopyS (pattern, dirName); + if (dirName [strlen (dirName) - 1] != '/') + vStringPut (pattern, '/'); + } + vStringCatS (pattern, "#?"); + resize = createTagsForAmigaWildcard (vStringValue (pattern)); + vStringDelete (pattern); + } +#endif + } + return resize; +} + +static boolean createTagsForEntry (const char *const entryName) +{ + boolean resize = FALSE; + fileStatus *status = eStat (entryName); + + Assert (entryName != NULL); + if (isExcludedFile (entryName)) + verbose ("excluding \"%s\"\n", entryName); + else if (status->isSymbolicLink && ! Option.followLinks) + verbose ("ignoring \"%s\" (symbolic link)\n", entryName); + else if (! status->exists) + error (WARNING | PERROR, "cannot open source file \"%s\"", entryName); + else if (status->isDirectory) + resize = recurseIntoDirectory (entryName); + else if (! status->isNormalFile) + verbose ("ignoring \"%s\" (special file)\n", entryName); + else + resize = parseFile (entryName); + + eStatFree (status); + return resize; +} + +#ifdef MANUAL_GLOBBING + +static boolean createTagsForWildcardArg (const char *const arg) +{ + boolean resize = FALSE; + vString *const pattern = vStringNewInit (arg); + char *patternS = vStringValue (pattern); + +#if defined (HAVE_FINDFIRST) || defined (HAVE__FINDFIRST) + /* We must transform the "." and ".." forms into something that can + * be expanded by the findfirst/_findfirst functions. + */ + if (Option.recurse && + (strcmp (patternS, ".") == 0 || strcmp (patternS, "..") == 0)) + { + vStringPut (pattern, OUTPUT_PATH_SEPARATOR); + vStringCatS (pattern, "*.*"); + } + resize |= createTagsForWildcardUsingFindfirst (patternS); +#endif + vStringDelete (pattern); + return resize; +} + +#endif + +static boolean createTagsForArgs (cookedArgs *const args) +{ + boolean resize = FALSE; + + /* Generate tags for each argument on the command line. + */ + while (! cArgOff (args)) + { + const char *const arg = cArgItem (args); + +#ifdef MANUAL_GLOBBING + resize |= createTagsForWildcardArg (arg); +#else + resize |= createTagsForEntry (arg); +#endif + cArgForth (args); + parseOptions (args); + } + return resize; +} + +/* Read from an opened file a list of file names for which to generate tags. + */ +static boolean createTagsFromFileInput (FILE *const fp, const boolean filter) +{ + boolean resize = FALSE; + if (fp != NULL) + { + cookedArgs *args = cArgNewFromLineFile (fp); + parseOptions (args); + while (! cArgOff (args)) + { + resize |= createTagsForEntry (cArgItem (args)); + if (filter) + { + if (Option.filterTerminator != NULL) + fputs (Option.filterTerminator, stdout); + fflush (stdout); + } + cArgForth (args); + parseOptions (args); + } + cArgDelete (args); + } + return resize; +} + +/* Read from a named file a list of file names for which to generate tags. + */ +static boolean createTagsFromListFile (const char *const fileName) +{ + boolean resize; + Assert (fileName != NULL); + if (strcmp (fileName, "-") == 0) + resize = createTagsFromFileInput (stdin, FALSE); + else + { + FILE *const fp = fopen (fileName, "r"); + if (fp == NULL) + error (FATAL | PERROR, "cannot open list file \"%s\"", fileName); + resize = createTagsFromFileInput (fp, FALSE); + fclose (fp); + } + return resize; +} + +#if defined (HAVE_CLOCK) +# define CLOCK_AVAILABLE +# ifndef CLOCKS_PER_SEC +# define CLOCKS_PER_SEC 1000000 +# endif +#elif defined (HAVE_TIMES) +# define CLOCK_AVAILABLE +# define CLOCKS_PER_SEC 60 +static clock_t clock (void) +{ + struct tms buf; + + times (&buf); + return (buf.tms_utime + buf.tms_stime); +} +#else +# define clock() (clock_t)0 +#endif + +static void printTotals (const clock_t *const timeStamps) +{ + const unsigned long totalTags = TagFile.numTags.added + + TagFile.numTags.prev; + + fprintf (errout, "%ld file%s, %ld line%s (%ld kB) scanned", + Totals.files, plural (Totals.files), + Totals.lines, plural (Totals.lines), + Totals.bytes/1024L); +#ifdef CLOCK_AVAILABLE + { + const double interval = ((double) (timeStamps [1] - timeStamps [0])) / + CLOCKS_PER_SEC; + + fprintf (errout, " in %.01f seconds", interval); + if (interval != (double) 0.0) + fprintf (errout, " (%lu kB/s)", + (unsigned long) (Totals.bytes / interval) / 1024L); + } +#endif + fputc ('\n', errout); + + fprintf (errout, "%lu tag%s added to tag file", + TagFile.numTags.added, plural (TagFile.numTags.added)); + if (Option.append) + fprintf (errout, " (now %lu tags)", totalTags); + fputc ('\n', errout); + + if (totalTags > 0 && Option.sorted != SO_UNSORTED) + { + fprintf (errout, "%lu tag%s sorted", totalTags, plural (totalTags)); +#ifdef CLOCK_AVAILABLE + fprintf (errout, " in %.02f seconds", + ((double) (timeStamps [2] - timeStamps [1])) / CLOCKS_PER_SEC); +#endif + fputc ('\n', errout); + } + +#ifdef DEBUG + fprintf (errout, "longest tag line = %lu\n", + (unsigned long) TagFile.max.line); +#endif +} + +static boolean etagsInclude (void) +{ + return (boolean)(Option.etags && Option.etagsInclude != NULL); +} + +static void makeTags (cookedArgs *args) +{ + clock_t timeStamps [3]; + boolean resize = FALSE; + boolean files = (boolean)(! cArgOff (args) || Option.fileList != NULL + || Option.filter); + + if (! files) + { + if (filesRequired ()) + error (FATAL, "No files specified. Try \"%s --help\".", + getExecutableName ()); + else if (! Option.recurse && ! etagsInclude ()) + return; + } + +#define timeStamp(n) timeStamps[(n)]=(Option.printTotals ? clock():(clock_t)0) + if (! Option.filter) + openTagFile (); + + timeStamp (0); + + if (! cArgOff (args)) + { + verbose ("Reading command line arguments\n"); + resize = createTagsForArgs (args); + } + if (Option.fileList != NULL) + { + verbose ("Reading list file\n"); + resize = (boolean) (createTagsFromListFile (Option.fileList) || resize); + } + if (Option.filter) + { + verbose ("Reading filter input\n"); + resize = (boolean) (createTagsFromFileInput (stdin, TRUE) || resize); + } + if (! files && Option.recurse) + resize = recurseIntoDirectory ("."); + + timeStamp (1); + + if (! Option.filter) + closeTagFile (resize); + + timeStamp (2); + + if (Option.printTotals) + printTotals (timeStamps); +#undef timeStamp +} + +/* + * Start up code + */ + +extern int main (int __unused__ argc, char **argv) +{ + cookedArgs *args; +#ifdef VMS + extern int getredirection (int *ac, char ***av); + + /* do wildcard expansion and I/O redirection */ + getredirection (&argc, &argv); +#endif + +#ifdef AMIGA + /* This program doesn't work when started from the Workbench */ + if (argc == 0) + exit (1); +#endif + +#ifdef __EMX__ + _wildcard (&argc, &argv); /* expand wildcards in argument list */ +#endif + +#if defined (macintosh) && BUILD_MPW_TOOL == 0 + argc = ccommand (&argv); +#endif + + setCurrentDirectory (); + setExecutableName (*argv++); + checkRegex (); + + args = cArgNewFromArgv (argv); + previewFirstOption (args); + testEtagsInvocation (); + initializeParsing (); + initOptions (); + readOptionConfiguration (); + verbose ("Reading initial options from command line\n"); + parseOptions (args); + checkOptions (); + makeTags (args); + + /* Clean up. + */ + cArgDelete (args); + freeKeywordTable (); + freeRoutineResources (); + freeSourceFileResources (); + freeTagFileResources (); + freeOptionResources (); + freeParserResources (); + freeRegexResources (); + + exit (0); + return 0; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/main.h b/main.h new file mode 100644 index 0000000..ad9a8e6 --- /dev/null +++ b/main.h @@ -0,0 +1,32 @@ +/* +* $Id: main.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to main.c +*/ +#ifndef _MAIN_H +#define _MAIN_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "vstring.h" + +/* +* FUNCTION PROTOTYPES +*/ +extern void addTotals (const unsigned int files, const long unsigned int lines, const long unsigned int bytes); +extern boolean isDestinationStdout (void); +extern int main (int argc, char **argv); + +#endif /* _MAIN_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/maintainer.mak b/maintainer.mak new file mode 100644 index 0000000..6c76c2c --- /dev/null +++ b/maintainer.mak @@ -0,0 +1,476 @@ +# $Id: maintainer.mak 722 2009-07-09 16:10:35Z dhiebert $ +# +# Copyright (c) 1996-2009, Darren Hiebert +# +# Development makefile for Exuberant Ctags. Also used to build releases. +# Requires GNU make. + +OBJEXT := o + +include source.mak + +DSOURCES := $(SOURCES) debug.c + +VERSION_FILES:= ctags.h ctags.1 NEWS + +LIB_FILES := readtags.c readtags.h + +ENVIRONMENT_MAKEFILES := \ + mk_bc3.mak mk_bc5.mak mk_djg.mak mk_manx.mak mk_mingw.mak \ + mk_mpw.mak mk_mvc.mak mk_os2.mak mk_qdos.mak mk_sas.mak \ + +COMMON_FILES := COPYING EXTENDING.html FAQ INSTALL.oth MAINTAINERS NEWS README \ + $(ENVIRONMENT_MAKEFILES) source.mak \ + $(DSOURCES) $(HEADERS) $(LIB_FILES) \ + $(ENVIRONMENT_SOURCES) $(ENVIRONMENT_HEADERS) + +UNIX_FILES := $(COMMON_FILES) \ + .indent.pro INSTALL configure.ac \ + Makefile.in maintainer.mak \ + descrip.mms mkinstalldirs magic.diff \ + ctags.spec ctags.1 + +REGEX_DIR := gnu_regex + +WIN_FILES := $(COMMON_FILES) $(VERSION_FILES) + +SVN_FILES := $(UNIX_FILES) + +DEP_DIR := .deps + +OBJECTS := $(patsubst %.c,%.o,$(notdir $(SOURCES))) +DOBJECTS := $(patsubst %.c,%.od,$(notdir $(DSOURCES))) +DEPS := $(patsubst %.c,$(DEP_DIR)/%.d,$(notdir $(SOURCES))) + +WARNINGS := -Wall -W -Wpointer-arith -Wcast-align -Wwrite-strings \ + -Wmissing-prototypes -Wmissing-declarations \ + -Wnested-externs -Wcast-qual -Wshadow -pedantic \ + -Wstrict-prototypes \ + # -Wtraditional -Wconversion -Werror + +PRODUCER := Darren B. Hiebert +EMAIL := dhiebert@users.sourceforge.net +CTAGS_WEBSITE := http://ctags.sourceforge.net +RPM_ROOT := rpms +RPM_ABS_ROOT := $(PWD)/$(RPM_ROOT) +WINDOWS_DIR := win32 +RELEASE_DIR := releases +CTAGS_WEBDIR := website +win_version = $(subst .,,$(version)) +HOST_ARCH := $(shell uname -p) + +ifneq ($(findstring $(HOST_ARCH),i386 i686),) +COMP_ARCH := -march=i686 +endif + +CC := gcc +INCLUDE := -I. +DEFS := -DHAVE_CONFIG_H +COMP_FLAGS := $(INCLUDE) $(DEFS) $(CFLAGS) +PROF_OPT := -O3 $(COMP_ARCH) +#OPT := $(PROF_OPT) -fomit-frame-pointer +OPT := $(PROF_OPT) +DCFLAGS := $(COMP_FLAGS) -DDEBUG -DINTERNAL_SORT +LD := gcc +LDFLAGS := +RPM_FLAGS := -O3 $(COMP_ARCH) + +AUTO_GEN := configure config.h.in +CONFIG_GEN := config.cache config.log config.status config.run config.h Makefile +PROF_GEN := gmon.out +COV_GEN := *.da *.gcov + +UNIX2DOS := perl -pe 's/(\r\n|\n|\r)/\r\n/g' +MAN2HTML := tbl | groff -Wall -mtty-char -mandoc -Thtml -c + +# +# Targets +# +default: all +ifeq ($(findstring clean,$(MAKECMDGOALS)),) +ifeq ($(wildcard config.h),) +ctags dctags ctags.prof ctags.cov: + $(MAKE) config.h + $(MAKE) $(MAKECMDGOALS) +else +all: dctags tags syntax.vim + +-include $(DEPS) $(DEP_DIR)/readtags.d + +# +# Executable targets +# +ctags: $(OBJECTS) + @ echo "-- Linking $@" + @ $(LD) -o $@ $(LDFLAGS) $^ + +dctags: $(DOBJECTS) debug.od + @ echo "-- Building $@" + $(LD) -o $@ $(LDFLAGS) $^ + +ctags.prof: $(SOURCES) $(HEADERS) Makefile + $(CC) -pg $(PROF_OPT) $(COMP_FLAGS) $(WARNINGS) $(SOURCES) -o $@ + +ctags.cov: $(SOURCES) $(HEADERS) Makefile + $(CC) -fprofile-arcs -ftest-coverage $(COMP_FLAGS) $(WARNINGS) $(SOURCES) -o $@ + +gcov: $(SOURCES:.c=.c.gcov) + +readtags: readtags.[ch] + $(CC) -g $(COMP_FLAGS) -DDEBUG -DREADTAGS_MAIN -o $@ readtags.c + +readtags.o: readtags.c readtags.h + $(CC) $(COMP_FLAGS) -c readtags.c + +etyperef: etyperef.o keyword.o routines.o strlist.o vstring.o + $(CC) -o $@ $^ + +etyperef.o: eiffel.c + $(CC) -DTYPE_REFERENCE_TOOL $(OPT) $(COMP_FLAGS) -o $@ -c $< + +endif +endif + +# +# Support targets +# +FORCE: + +config.h.in: configure.ac + autoheader + @ touch $@ + +configure: configure.ac + autoconf + +config.status: configure + ./config.status --recheck + +config.h: config.h.in config.status + ./config.status + touch $@ + +depclean: + rm -f $(DEPS) + +profclean: + rm -f $(PROF_GEN) + +gcovclean: + rm -f $(COV_GEN) + +clean: depclean profclean gcovclean clean-test + rm -f *.[ois] *.o[dm] ctags dctags ctags*.exe readtags etyperef \ + ctags.man ctags.html ctags.prof ctags.cov *.bb *.bbg tags TAGS syntax.vim + +distclean: clean + rm -f $(CONFIG_GEN) + +maintainer-clean maintclean: distclean + rm -f $(AUTO_GEN) + +%.man: %.1 Makefile + tbl $< | groff -Wall -mtty-char -mandoc -Tascii -c | sed 's/.//g' > $@ + +%.html: %.1 Makefile + cat $< | $(MAN2HTML) > $@ + +tags: $(DSOURCES) $(HEADERS) $(LIB_FILES) Makefile *.mak + @ echo "-- Building tag file" + @ ctags * + +# +# Create a Vim syntax file for all typedefs +# +syntax: syntax.vim +syntax.vim: $(DSOURCES) $(HEADERS) $(LIB_FILES) + @ echo "-- Generating syntax file" + @ ctags --c-types=cgstu --file-scope -o- $^ |\ + awk '{print $$1}' | sort -u | fmt |\ + awk '{printf("syntax keyword Typedef\t%s\n", $$0)}' > $@ + +# +# Testing +# +-include testing.mak + +# +# Help +# +help: + @ echo "Major targets:" + @ echo "default : Build dctags, with debugging support" + @ echo "ctags : Build optimized binary" + @ echo "help-release: Print help on releasing ctags" + +# +# Release management +# + +help-release: + @ echo "1. make release-svn-X.Y" + @ echo "2. make release-source-X.Y" + @ echo "3. move ctags-X.Y.tar.gz to Linux" + @ echo "4. On Linux: Extract tar; make -f maintainer.mak release-rpm-X.Y" + @ echo "5. On Windows: cd $(WINDOWS_DIR)/winXY; nmake -f mk_mvc.mak ctags.exe mostlyclean" + @ echo "6. make version=X.Y win-zip" + @ echo "7. make website-X.Y" + +.SECONDARY: + +RPM_ARCH := i386 +RPM_SUBDIRS := BUILD SOURCES SPECS SRPMS RPMS +RPM_DIRS := $(addprefix $(RPM_ROOT)/,$(RPM_SUBDIRS)) + +$(RELEASE_DIR)/ctags-%-1.$(RPM_ARCH).rpm: \ + $(RPM_ROOT)/RPMS/$(RPM_ARCH)/ctags-%-1.$(RPM_ARCH).rpm \ + | $(RELEASE_DIR) + ln -f $< $@ + chmod 644 $@ + +$(RELEASE_DIR)/ctags-%-1.src.rpm: \ + $(RPM_ROOT)/SRPMS/ctags-%-1.src.rpm \ + | $(RELEASE_DIR) + ln -f $< $@ + chmod 644 $@ + +$(eval $(RPM_DIRS) $(RELEASE_DIR): ; mkdir -p $$@) + +$(RPM_ROOT)/SRPMS/ctags-%-1.src.rpm \ +$(RPM_ROOT)/RPMS/$(RPM_ARCH)/ctags-%-1.$(RPM_ARCH).rpm: \ + $(RPM_ROOT)/SOURCES/ctags-%.tar.gz \ + $(RPM_ROOT)/SPECS/ctags-%.spec \ + | $(RPM_DIRS) + rpmbuild --define '_topdir $(RPM_ABS_ROOT)' --define 'optflags $(RPM_FLAGS)' --define 'packager $(PRODUCER) $(CTAGS_WEBSITE)' -ba $(RPM_ROOT)/SPECS/ctags-$*.spec + rm -fr $(RPM_ROOT)/BUILD/ctags-$* + +$(RPM_ROOT)/rpmrc: rpmmacros maintainer.mak + echo "optflags: $(RPM_ARCH) $(RPM_FLAGS)" > $@ + echo "macrofiles: $(PWD)/rpmmacros" >> $@ + +$(RPM_ROOT)/rpmmacros: maintainer.mak + echo "%_topdir $(RPM_ABS_ROOT)" > $@ + echo '%_gpg_name "$(PRODUCER) <$(EMAIL)>"' >> $@ + echo "%packager $(PRODUCER) $(CTAGS_WEBSITE)" >> $@ + echo "%_i18ndomains %{nil}" >> $@ + echo "%debug_package %{nil}" >> $@ + +$(RPM_ROOT)/SPECS/ctags-%.spec: ctags.spec | $(RPM_ROOT)/SPECS + sed -e "s/@VERSION@/$*/" ctags.spec > $(RPM_ROOT)/SPECS/ctags-$*.spec + +$(RPM_ROOT)/SOURCES/ctags-%.tar.gz: $(RELEASE_DIR)/ctags-%.tar.gz | $(RPM_ROOT)/SOURCES + ln -f $< $@ + +$(RELEASE_DIR)/ctags-%.tar.gz: $(UNIX_FILES) | $(RELEASE_DIR) + @ echo "---------- Building tar ball" + if [ -d $(@D)/dirs/ctags-$* ]; then rm -fr $(@D)/dirs/ctags-$*; fi + mkdir -p $(@D)/dirs/ctags-$* + cp -pr $(UNIX_FILES) $(REGEX_DIR) $(@D)/dirs/ctags-$*/ + sed -e 's/\(PROGRAM_VERSION\) "\([^ ]*\)"/\1 "$*"/' ctags.h > $(@D)/dirs/ctags-$*/ctags.h + sed -e 's/"\(Version\) \([^ ]*\)"/"\1 $*"/' ctags.1 > $(@D)/dirs/ctags-$*/ctags.1 + sed -e 's/\(Current Version:\) [^ ]*/\1 $*/' -e 's/@VERSION@/$*/' -e "s/@DATE@/`date +'%d %b %Y'`/" NEWS > $(@D)/dirs/ctags-$*/NEWS + (cd $(@D)/dirs/ctags-$* ;\ + chmod 644 * ;\ + chmod 755 mkinstalldirs ;\ + chmod 755 $(REGEX_DIR) ;\ + chmod 644 $(REGEX_DIR)/* ;\ + autoheader ;\ + chmod 644 config.h.in ;\ + autoconf ;\ + chmod 755 configure ;\ + rm -fr autom4te.cache ;\ + cat ctags.1 | $(MAN2HTML) > ctags.html ;\ + ) + cd $(@D)/dirs && tar -zcf ../$(@F) ctags-$* + chmod 644 $@ + +clean-rpm: + rm -fr $(RPM_ROOT) + +ifneq ($(findstring win-,$(MAKECMDGOALS)),) +ifeq ($(version),,) +$(error $(MAKECMDGOALS) target requires value for 'version') +endif +endif + +check-version-%: + @ if [ -z "$(version)" ]; then echo "target requires value for 'version'" >&2; exit 1; fi + +$(WINDOWS_DIR)/ctags$(win_version): \ + $(RELEASE_DIR)/ctags-$(version).tar.gz maintainer.mak \ + | $(WINDOWS_DIR) + @ echo "---------- Building Win32 release directory" + rm -fr "$(WINDOWS_DIR)/ctags$(win_version)" + mkdir -p "$(WINDOWS_DIR)/ctags$(win_version)" + for file in $(WIN_FILES) ctags.html; do \ + $(UNIX2DOS) < "$(RELEASE_DIR)/dirs/ctags-$(version)/$${file}" > $@/$${file} ;\ + done + mkdir $@/$(REGEX_DIR) + for file in $(REGEX_DIR)/*; do \ + $(UNIX2DOS) < "$${file}" > $@/$(REGEX_DIR)/`basename $${file}` ;\ + done + chmod 644 $@/* + chmod 755 $@/$(REGEX_DIR) + chmod 644 $@/$(REGEX_DIR)/* + +$(RELEASE_DIR)/ctags%.zip: \ + check-version-% \ + $(WINDOWS_DIR)/ctags% \ + $(WINDOWS_DIR)/ctags%/ctags.exe + cd $(WINDOWS_DIR) && zip -r ../$@ ctags$* + +win-source: $(WINDOWS_DIR)/ctags$(win_version) + +win-zip: $(RELEASE_DIR)/ctags$(win_version).zip + +release-win-%: + $(MAKE) version="$*" win-source + +release-tar-%: $(RELEASE_DIR)/ctags-%.tar.gz + : + +release-rpm-%: \ + $(RELEASE_DIR)/ctags-%-1.$(RPM_ARCH).rpm \ + $(RELEASE_DIR)/ctags-%-1.src.rpm + : + +release-source-%: $(RELEASE_DIR)/ctags-%.tar.gz + $(MAKE) version="$*" win-source + +release-bin-%: release-rpm-% + $(MAKE) version="$*" win-zip + +$(WINDOWS_DIR): + mkdir -p $@ + +# +# Web site files +# +website-%: website-man-% website-index-% website-news-% \ + $(CTAGS_WEBDIR)/EXTENDING.html + : + +website-man-%: ctags.1 Makefile + @ echo "---------- Generating $(CTAGS_WEBDIR)/ctags.html" + umask 022 ; \ + sed -e 's/"\(Version\) \([^ ]*\)"/"\1 $*"/' ctags.1 |\ + $(MAN2HTML) > $(CTAGS_WEBDIR)/ctags.html + +website-index-%: index.html Makefile + @ echo "---------- Generating $(CTAGS_WEBDIR)/index.html" + umask 022 ; \ + sed -e "s/@VERSION@/$*/g" \ + -e "s/@DOS_VERSION@/`echo $* | sed 's/\.//g'`/g" \ + -e "s/@DATE@/`date +'%d %B %Y'`/" \ + $< > $(CTAGS_WEBDIR)/index.html + +website-news-%: NEWS maintainer.mak + @ echo "---------- Generating $(CTAGS_WEBDIR)/news.html" + umask 022 ; \ + sed -e 's/\(Current Version:\) [^ ]*/\1 $*/' \ + -e 's/@VERSION@/$*/' \ + -e "s/@DATE@/`date +'%d %b %Y'`/" \ + -e 's//\>/g' \ + -e 's@^Current Version:.*$$@Exuberant Ctags: Change Notes

Change Notes

@' \
+		-e 's@\(^ctags-.* (.*)\)$$@\1@' \
+		-e 's@^vim:.*$$@

Back to Exuberant Ctags@' \ + $< > $(CTAGS_WEBDIR)/news.html + +$(CTAGS_WEBDIR)/EXTENDING.html: EXTENDING.html + @ echo "---------- Generating $(CTAGS_WEBDIR)/EXTENDING.html" + cp $< $@ && chmod 644 $@ + +# +# SVN management +# +svn_url := https://ctags.svn.sourceforge.net/svnroot/ctags + +release-svn-%: svn-tagcheck-% + @ echo "---------- Tagging release $*" + svn copy -m'Release of ctags-$*' $(svn_url)/trunk $(svn_url)/tags/ctags-$* + +rerelease-svn-%: + @ echo "---------- Tagging release $*" + svn remove -m'Regenerating release of ctags-$*' $(svn_url)/tags/ctags-$* + svn copy -m'Release of ctags-$*' $(svn_url)/trunk $(svn_url)/tags/ctags-$* + +svn-tagcheck-%: + if svn list $(svn_url)/tags/ | grep -q 'ctags-$*/$$' >/dev/null 2>&1 ;then \ + echo "ctags-$* already exists; use rerelease-$*" >&2 ;\ + exit 1 ;\ + fi + +svn-files: + @ls -1 $(SVN_FILES) + +# +# Dependency file generation +# +$(DEPS): %.c maintainer.mak + @ if [ ! -d $(DEP_DIR) ] ;then mkdir -p $(DEP_DIR) ;fi + @ $(CC) -M $(DCFLAGS) $< | sed 's/\($*\.o\)\([ :]\)/\1 $*.od $(@F)\2/g' > $@ + + +%.inc: %.c Makefile + -@ $(CC) -MM $(DCFLAGS) $< + +# +# Compilation rules +# +%.o %.od: gnu_regex/%.c +# @ echo "-- Compiling $<" + $(CC) $(CFLAGS) -D__USE_GNU -Dbool=int -Dfalse=0 -Dtrue=1 -I$(REGEX_DIR) $(OPT) -c $< + +%.o: %.c + @ echo "-- Compiling $<" + @ $(CC) $(COMP_FLAGS) -DEXTERNAL_SORT $(OPT) $(WARNINGS) -Wuninitialized -c $< + +%.od: %.c + @ echo "-- Compiling (debug) $<" + @ $(CC) -g $(DCFLAGS) $(WARNINGS) -o $*.od -c $< + +%.i: %.c FORCE + $(CC) $(DCFLAGS) $(WARNINGS) -Wuninitialized -O -E $< > $@ + +%.ic: %.c FORCE + $(CC) $(DCFLAGS) $(WARNINGS) -Wuninitialized -O -E $< | sed '/^[ ]*$/d' > $@ + +%.s: %.c FORCE + $(CC) $(DCFLAGS) $(WARNINGS) -S $< > $@ + +readtags.err: DCFLAGS += -DREADTAGS_MAIN + +%.err: %.c + @ $(CC) $(DCFLAGS) $(WARNINGS) -Wuninitialized -O -c $< + @ rm $*.o + +%.c.gcov: %.da + @ gcov $*.c + +%.sproto: %.c + @ genproto -s -m __ARGS $< + +%.proto: %.c + @ genproto -e -m __ARGS $< + +# Print out the value of a variable +# From http://www.cmcrossroads.com/ubbthreads/showflat.php?Cat=0&Board=cmbasics&Number=28829 +print-%: + @echo $* = $($*) + +# Print out the expanded values of all variables +# From http://www.cmcrossroads.com/ubbthreads/showflat.php?Cat=0&Number=29581 +.PHONY: print-vars +print-vars: + @$(foreach V,$(sort $(.VARIABLES)), \ + $(if $(filter-out environment% default automatic, \ + $(origin $V)),$(warning $V=$($V)))) + +# Print out the declared values of all variables +.PHONY: print-vars-decl +print-vars-decl: + @$(foreach V,$(sort $(.VARIABLES)), \ + $(if $(filter-out environment% default automatic, \ + $(origin $V)),$(warning $V=$(value $V)))) + +# vi:ts=4 sw=4 diff --git a/make.c b/make.c new file mode 100644 index 0000000..f468b5a --- /dev/null +++ b/make.c @@ -0,0 +1,217 @@ +/* +* $Id: make.c 681 2008-10-12 22:43:00Z dhiebert $ +* +* Copyright (c) 2000-2005, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for makefiles. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include +#include + +#include "options.h" +#include "parse.h" +#include "read.h" +#include "vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_MACRO +} shKind; + +static kindOption MakeKinds [] = { + { TRUE, 'm', "macro", "macros"} +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static int nextChar (void) +{ + int c = fileGetc (); + if (c == '\\') + { + c = fileGetc (); + if (c == '\n') + c = fileGetc (); + } + return c; +} + +static void skipLine (void) +{ + int c; + do + c = nextChar (); + while (c != EOF && c != '\n'); + if (c == '\n') + fileUngetc (c); +} + +static int skipToNonWhite (void) +{ + int c; + do + c = nextChar (); + while (c != '\n' && isspace (c)); + return c; +} + +static boolean isIdentifier (int c) +{ + return (boolean)(c != '\0' && (isalnum (c) || strchr (".-_", c) != NULL)); +} + +static void readIdentifier (const int first, vString *const id) +{ + int c = first; + vStringClear (id); + while (isIdentifier (c)) + { + vStringPut (id, c); + c = nextChar (); + } + fileUngetc (c); + vStringTerminate (id); +} + +static void skipToMatch (const char *const pair) +{ + const int begin = pair [0], end = pair [1]; + const unsigned long inputLineNumber = getInputLineNumber (); + int matchLevel = 1; + int c = '\0'; + + while (matchLevel > 0) + { + c = nextChar (); + if (c == begin) + ++matchLevel; + else if (c == end) + --matchLevel; + else if (c == '\n') + break; + } + if (c == EOF) + verbose ("%s: failed to find match for '%c' at line %lu\n", + getInputFileName (), begin, inputLineNumber); +} + +static void findMakeTags (void) +{ + vString *name = vStringNew (); + boolean newline = TRUE; + boolean in_define = FALSE; + boolean in_rule = FALSE; + boolean variable_possible = TRUE; + int c; + + while ((c = nextChar ()) != EOF) + { + if (newline) + { + if (in_rule) + { + if (c == '\t') + { + skipLine (); /* skip rule */ + continue; + } + else + in_rule = FALSE; + } + variable_possible = (boolean)(!in_rule); + newline = FALSE; + } + if (c == '\n') + newline = TRUE; + else if (isspace (c)) + continue; + else if (c == '#') + skipLine (); + else if (c == '(') + skipToMatch ("()"); + else if (c == '{') + skipToMatch ("{}"); + else if (c == ':') + { + variable_possible = TRUE; + in_rule = TRUE; + } + else if (variable_possible && isIdentifier (c)) + { + readIdentifier (c, name); + if (strcmp (vStringValue (name), "endef") == 0) + in_define = FALSE; + else if (in_define) + skipLine (); + else if (strcmp (vStringValue (name), "define") == 0 && + isIdentifier (c)) + { + in_define = TRUE; + c = skipToNonWhite (); + readIdentifier (c, name); + makeSimpleTag (name, MakeKinds, K_MACRO); + skipLine (); + } + else { + if (strcmp(vStringValue (name), "export") == 0 && + isIdentifier (c)) + { + c = skipToNonWhite (); + readIdentifier (c, name); + } + c = skipToNonWhite (); + if (strchr (":?+", c) != NULL) + { + boolean append = (boolean)(c == '+'); + if (c == ':') + in_rule = TRUE; + c = nextChar (); + if (c != '=') + fileUngetc (c); + else if (append) + { + skipLine (); + continue; + } + } + if (c == '=') + { + makeSimpleTag (name, MakeKinds, K_MACRO); + in_rule = FALSE; + skipLine (); + } + } + } + else + variable_possible = FALSE; + } + vStringDelete (name); +} + +extern parserDefinition* MakefileParser (void) +{ + static const char *const patterns [] = { "[Mm]akefile", "GNUmakefile", NULL }; + static const char *const extensions [] = { "mak", "mk", NULL }; + parserDefinition* const def = parserNew ("Make"); + def->kinds = MakeKinds; + def->kindCount = KIND_COUNT (MakeKinds); + def->patterns = patterns; + def->extensions = extensions; + def->parser = findMakeTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/matlab.c b/matlab.c new file mode 100644 index 0000000..0811457 --- /dev/null +++ b/matlab.c @@ -0,0 +1,44 @@ +/* +* $Id$ +* +* Copyright (c) 2008, David Fishburn +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for MATLAB language files. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include +#include "parse.h" + +/* +* FUNCTION DEFINITIONS +*/ + +static void installMatLabRegex (const langType language) +{ + /* function [x,y,z] = asdf */ + addTagRegex (language, "^function[ \t]*\\[.*\\][ \t]*=[ \t]*([a-zA-Z0-9_]+)", "\\1", "f,function", NULL); + /* function x = asdf */ + addTagRegex (language, "^function[ \t]*[a-zA-Z0-9_]+[ \t]*=[ \t]*([a-zA-Z0-9_]+)", "\\1", "f,function", NULL); + /* function asdf */ + addTagRegex (language, "^function[ \t]*([a-zA-Z0-9_]+)[^=]*$", "\\1", "f,function", NULL); +} + +extern parserDefinition* MatLabParser () +{ + static const char *const extensions [] = { "m", NULL }; + parserDefinition* const def = parserNew ("MatLab"); + def->extensions = extensions; + def->initialize = installMatLabRegex; + def->regex = TRUE; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/mk_bc3.mak b/mk_bc3.mak new file mode 100644 index 0000000..e4da935 --- /dev/null +++ b/mk_bc3.mak @@ -0,0 +1,46 @@ +# $Id: mk_bc3.mak 278 2003-02-24 02:27:53Z darren $ +# +# Simple makefile for Borland C++ 3.1 + +!include source.mak + +# Adjust the paths to your location of the borland C files +BCCLOC = c:\borlandc +CC = $(BCCLOC)\bin\bcc +INC = -I$(BCCLOC)\include +LIB = -L$(BCCLOC)\lib + +# Add this file for wildcard expansion (does NOT work with 4.0!) +#EXTRA = $(BCCLOC)\lib\wildargs.obj + +# The following compile options can be changed for better machines. +# replace -1- with -2 to produce code for a 80286 or higher +# replace -1- with -3 to produce code for a 80386 or higher +# add -v for source debugging +OPTIMIZE= -1- -O1 + +CFLAGS = -DMSDOS -ml -d -w-ccc -w-par -w-pia -w-rch -w-sus $(INC) +LFLAGS = $(LIB) $(EXTRA) +EXTRA_LIBS = + +ctags.exe: $(SOURCES) respbc3 + $(CC) $(OPTIMIZE) -e$@ @respbc3 + +debug: dctags.exe + +dctags.exe: $(SOURCES) respbc3 debug.c + $(CC) -DDEBUG -v -e$@ @respbc3 debug.c + +respbc3: mk_bc3.mak + copy &&| +$(CFLAGS) +$(LFLAGS) +$(SOURCES) +$(EXTRA_LIBS) +| $@ + +clean: + del *.exe + del *.obj + del respbc3 + del tags diff --git a/mk_bc5.mak b/mk_bc5.mak new file mode 100644 index 0000000..20a2946 --- /dev/null +++ b/mk_bc5.mak @@ -0,0 +1,49 @@ +# $Id: mk_bc5.mak 623 2007-09-10 02:52:22Z dhiebert $ +# +# Makefile for Win32 using Borland C++ compiler, version 5.5 (free version) + +!include source.mak + +REGEX_DEFINE = -DHAVE_REGCOMP -DREGEX_MALLOC -DSTDC_HEADERS=1 +DEFINES = -DWIN32 $(REGEX_DEFINE) +INCLUDES = -I. -Ignu_regex +WARNINGS = -w-aus -w-par -w-pia -w-pro -w-sus +CFLAGS = -d -DSTRICT -lTpe -lap +BCC = bcc32 + +# Optimizations if your platform supports all of them. +OPT = -O2 -OS -lGt + +# Allows multithreading +#MT_OPT = -tWM -lcw32mt + +ctags: ctags.exe + +ctags.exe: respbc5 + $(BCC) $(OPT) $(MT_OPT) -e$@ $(LDFLAGS) @respbc5 + +readtags.exe: readtags.c + $(BCC) $(CFLAGS) $(OPT) $(MT_OPT) -e$@ $(DEFINES) -DREADTAGS_MAIN readtags.c $(LDFLAGS) + +# Debug version +dctags.exe: respbc5 + $(BCC) -DDEBUG -e$@ $(LDFLAGS) @respbc5 debug.c + +regex.obj: + $(BCC) -c -o$@ -w- $(DEFINES) -Dconst= $(INCLUDES) + +respbc5: $(SOURCES) $(REGEX_SOURCES) $(HEADERS) $(REGEX_HEADERS) mk_bc5.mak + echo $(DEFINES) $(INCLUDES) > $@ + echo $(WARNINGS) >> $@ + echo $(CFLAGS) >> $@ + echo $(SOURCES) $(REGEX_SOURCES) >> $@ + +mostlyclean: + - del *.obj + - del *.tds + - del dctags.exe + - del respbc5 + - del tags + +clean: mostlyclean + - del ctags.exe diff --git a/mk_djg.mak b/mk_djg.mak new file mode 100644 index 0000000..8ea9313 --- /dev/null +++ b/mk_djg.mak @@ -0,0 +1,18 @@ +# $Id: mk_djg.mak 307 2003-03-31 04:53:22Z darren $ +# +# The most simplistic Makefile, for DJGPP Version 2 on Windows +# +# Rather than using this makefile, it is preferable to run "configure", then +# "make" under BASH on DJGPP (i.e. the standard means of building a package on +# Unix), but you have to have a fuller complement of DJGPP packages installed +# to do this. + +include source.mak + +CFLAGS = -O2 -Wall -DMSDOS + +ctags.exe: $(SOURCES) + gcc $(CFLAGS) -s -o ctags.exe $(SOURCES) -lpc + +clean: + del ctags.exe diff --git a/mk_manx.mak b/mk_manx.mak new file mode 100644 index 0000000..e1f513c --- /dev/null +++ b/mk_manx.mak @@ -0,0 +1,65 @@ +# $Id: mk_manx.mak 264 2003-02-13 02:59:30Z darren $ +# +# Makefile for ctags on the Amiga, using Aztec/Manx C 5.0 or later + +OBJEXT = o + +OBJECTS = \ + args.$(OBJEXT) \ + asm.$(OBJEXT) \ + asp.$(OBJEXT) \ + awk.$(OBJEXT) \ + beta.$(OBJEXT) \ + c.$(OBJEXT) \ + cobol.$(OBJEXT) \ + eiffel.$(OBJEXT) \ + entry.$(OBJEXT) \ + erlang.$(OBJEXT) \ + fortran.$(OBJEXT) \ + get.$(OBJEXT) \ + keyword.$(OBJEXT) \ + lisp.$(OBJEXT) \ + lregex.$(OBJEXT) \ + lua.$(OBJEXT) \ + main.$(OBJEXT) \ + make.$(OBJEXT) \ + options.$(OBJEXT) \ + parse.$(OBJEXT) \ + pascal.$(OBJEXT) \ + perl.$(OBJEXT) \ + php.$(OBJEXT) \ + python.$(OBJEXT) \ + read.$(OBJEXT) \ + rexx.$(OBJEXT) \ + routines.$(OBJEXT) \ + ruby.$(OBJEXT) \ + scheme.$(OBJEXT) \ + sh.$(OBJEXT) \ + slang.$(OBJEXT) \ + sort.$(OBJEXT) \ + sml.$(OBJEXT) \ + sql.$(OBJEXT) \ + strlist.$(OBJEXT) \ + tcl.$(OBJEXT) \ + verilog.$(OBJEXT) \ + vim.$(OBJEXT) \ + vstring.$(OBJEXT) \ + yacc.$(OBJEXT) + +CC = cc + +#>>>>> choose between debugging (-bs) or optimizing (-so) +OPTIONS = -so +#OPTIONS = -bs + +#>>>>>> choose -g for debugging +LN_DEBUG = +#LN_DEBUG = -g + +CFLAGS = $(OPTIONS) -wapruq -ps -qf -DAMIGA -Dconst= + +Ctags: $(OBJECTS) + ln +q -m $(LN_DEBUG) -o Ctags $(OBJECTS) -lc16 -lm16 + +.c.o: + $(CC) $(CFLAGS) -o $*.o $*.c diff --git a/mk_mingw.mak b/mk_mingw.mak new file mode 100644 index 0000000..32a61ae --- /dev/null +++ b/mk_mingw.mak @@ -0,0 +1,31 @@ +# $Id: mk_mingw.mak 723 2009-07-09 20:53:19Z dhiebert $ +# +# Makefile for Exuberant Ctags under Win32 with MinGW compiler +# + +include source.mak + +REGEX_DEFINES = -DHAVE_REGCOMP -D__USE_GNU -Dbool=int -Dfalse=0 -Dtrue=1 -Dstrcasecmp=stricmp + +CFLAGS = -Wall +DEFINES = -DWIN32 $(REGEX_DEFINES) +INCLUDES = -I. -Ignu_regex +CC = gcc + +ctags.exe: OPT = -O4 +dctags.exe: OPT = -g +dctags.exe: DEBUG = -DDEBUG +dctags.exe: SOURCES += debug.c + +ctags: ctags.exe + +ctags.exe dctags.exe: $(SOURCES) $(REGEX_SOURCES) $(HEADERS) $(REGEX_HEADERS) + $(CC) $(OPT) $(CFLAGS) $(DEFINES) $(INCLUDES) -o $@ $(SOURCES) $(REGEX_SOURCES) + +readtags.exe: readtags.c + $(CC) $(OPT) $(CFLAGS) -DREADTAGS_MAIN $(DEFINES) $(INCLUDES) -o $@ $< + +clean: + - rm -f ctags.exe + - rm -f dctags.exe + - rm -f tags diff --git a/mk_mpw.mak b/mk_mpw.mak new file mode 100644 index 0000000..20c49e5 --- /dev/null +++ b/mk_mpw.mak @@ -0,0 +1,130 @@ +# $Id: mk_mpw.mak 264 2003-02-13 02:59:30Z darren $ +# +# Makefile for Macintosh using MPW +# +# Created by: Maarten L. Hekkelman + +HEADERS = ¶ + args.h ctags.h debug.h entry.h general.h get.h keyword.h ¶ + main.h options.h parse.h parsers.h read.h routines.h sort.h ¶ + strlist.h vstring.h mac.h + +SOURCES = ¶ + args.c ¶ + asm.c ¶ + asp.c ¶ + awk.c ¶ + beta.c ¶ + c.c ¶ + cobol.c ¶ + eiffel.c ¶ + entry.c ¶ + erlang.c ¶ + fortran.c ¶ + get.c ¶ + keyword.c ¶ + lisp.c ¶ + lregex.c ¶ + lua.c ¶ + main.c ¶ + make.c ¶ + options.c ¶ + parse.c ¶ + pascal.c ¶ + perl.c ¶ + php.c ¶ + python.c ¶ + read.c ¶ + rexx.c ¶ ¶ + routines.c ¶ + ruby.c ¶ + scheme.c ¶ + sh.c ¶ + slang.c ¶ + sort.c ¶ + sml.c ¶ + sql.c ¶ + strlist.c ¶ + tcl.c ¶ + verilog.c ¶ + vim.c ¶ + vstring.c ¶ + yacc.c ¶ + mac.c + +OBJ = ¶ + args.o ¶ + asm.o ¶ + asp.o ¶ + awk.o ¶ + beta.o ¶ + c.o ¶ + cobol.o ¶ + eiffel.o ¶ + entry.o ¶ + erlang.o ¶ + fortran.o ¶ + get.o ¶ + keyword.o ¶ + lisp.o ¶ + lregex.o ¶ + lua.o ¶ + main.o ¶ + make.o ¶ + options.o ¶ + parse.o ¶ + pascal.o ¶ + perl.o ¶ + php.o ¶ + python.o ¶ + read.o ¶ + rexx.o ¶ + routines.o ¶ + ruby.o ¶ + scheme.o ¶ + sh.o ¶ + slang.o ¶ + sort.o ¶ + sml.o ¶ + sql.o ¶ + strlist.o ¶ + tcl.o ¶ + verilog.o ¶ + vim.o ¶ + vstring.o ¶ + yacc.o ¶ + mac.o + +LIBS = ¶ + {PPCLibraries}PPCToolLibs.o ¶ + {SharedLibraries}MathLib ¶ + {SharedLibraries}InterfaceLib ¶ + {SharedLibraries}StdCLib ¶ + {MWPPCLibraries}'MSL StdCRuntime.Lib' + +CC = mwcppc +LD = mwlinkppc + +# Using -nodefaults to avoid having {MWCIncludes} in our include paths +# Needed since we're building a MPW Tool and not an application. +COptions = -nodefaults -i : -i- -i {CIncludes} -opt full +LOptions = -xm m -stacksize 128 + +all Ä CTags + +CTags Ä TurnOfEcho {OBJ} + {LD} {LOptions} -o CTags {OBJ} {LIBS} + +{OBJ} Ä {HEADERS} + +tags Ä CTags + :CTags -p. {SOURCES} {HEADERS} + +clean Ä + Delete -y -i {OBJ} {CTags} tags + +.o Ä .c + {CC} {depDir}{default}.c -o {targDir}{default}.o {COptions} + +TurnOfEcho Ä + set echo 0 diff --git a/mk_mvc.mak b/mk_mvc.mak new file mode 100644 index 0000000..80a128e --- /dev/null +++ b/mk_mvc.mak @@ -0,0 +1,40 @@ +# $Id: mk_mvc.mak 724 2009-07-09 20:54:01Z dhiebert $ +# +# Makefile for Win32 using Microsoft Visual C++ compiler + +include source.mak + +REGEX_DEFINES = -DHAVE_REGCOMP -D__USE_GNU -Dbool=int -Dfalse=0 -Dtrue=1 -Dstrcasecmp=stricmp +DEFINES = -DWIN32 $(REGEX_DEFINES) +INCLUDES = -I. -Ignu_regex +OPT = /O2 + +ctags: ctags.exe + +ctags.exe: respmvc + cl $(OPT) /Fe$@ @respmvc /link setargv.obj + +readtags.exe: readtags.c + cl /clr $(OPT) /Fe$@ $(DEFINES) -DREADTAGS_MAIN readtags.c /link setargv.obj + +# Debug version +dctags.exe: respmvc + cl /Zi -DDEBUG /Fe$@ @respmvc debug.c /link setargv.obj + +regex.obj: + cl /c $(OPT) /Fo$@ $(INCLUDES) $(DEFINES) gnu_regex/regex.c + +respmvc: $(SOURCES) $(REGEX_SOURCES) $(HEADERS) $(REGEX_HEADERS) mk_mvc.mak + echo $(DEFINES) > $@ + echo $(INCLUDES) >> $@ + echo $(SOURCES) >> $@ + echo $(REGEX_SOURCES) >> $@ + +mostlyclean: + - del *.obj + - del dctags.exe + - del respmvc + - del tags + +clean: mostlyclean + - del ctags.exe diff --git a/mk_os2.mak b/mk_os2.mak new file mode 100644 index 0000000..403a496 --- /dev/null +++ b/mk_os2.mak @@ -0,0 +1,104 @@ +# $Id: mk_os2.mak 74 2002-01-27 21:20:55Z darren $ +# +# A Makefile for OS/2 using EMX/gcc +# You may want to use the OS/2 port of autoconf for building +# and comment-out the according statements in this Makefile. +# You need a library to provide regex support. +# libExt might do this, but currently (2/2001) it doesn't work well +# together with ctags ... +# +# Provided and supported by +# Alexander Mai +# or + +default: + @echo "Enter $(MAKE) -f mk_os2.mak target" + @echo "where target is one of:" + @echo " small (small executable req. EMX runtime)" + @echo " debug (executable for debugging purposes)" + @echo " release (stand-alone executable)" + @echo " clean (remove all files built)" + +# Use this to create a small binary +# (requires EMX runtime libraries) +small: + $(MAKE) -f mk_os2.mak all \ + CC="gcc" \ + CFLAGS="-O5 -mpentium -Wall" \ + LFLAGS="-Zcrtdll -s" \ + LIBS="-lregex" \ + OBJEXT="o" + +# Use this to create a binary for debugging purposes +# (requires EMX runtime libraries) +debug: + $(MAKE) -f mk_os2.mak all \ + CC="gcc" \ + CFLAGS="-O0 -Wall -g" \ + LFLAGS="-Zcrtdll -g" \ + LIBS="-lregex" \ + OBJEXT="o" + +# Use this to create a stand-alone binary for distribution +# (requires link386 for linking but no EMX runtime libraries) +release: + $(MAKE) -f mk_os2.mak all \ + CC="gcc" \ + CFLAGS="-g -O5 -mpentium -Wall" \ + LFLAGS="-s -Zomf -Zsys -Zlinker /PM:VIO" \ + LIBS="-lregex" \ + OBJEXT="obj" + +# Use the line below if you have created config.h +# (manually or by running configure) +# Otherwise use built-in defaults (#ifdef OS2)! +# DEFINES=-DHAVE_CONFIG_H +DEFINES=-DOS2 + + +# General rules and definitions + +.SUFFIXES: .c .exe .h .o .obj + +include source.mak + +all: ctags.exe readtags.exe readtags.a readtags.lib +ctags: ctags.exe +etags: etags.exe + +ctags.exe: $(OBJECTS) + $(CC) $(CFLAGS) $(LFLAGS) -o $@ $^ $(LIBS) + +etags.exe: ctags.exe + @copy $< $@ + +# The readtags executable +readtags.exe: readtags.c + $(CC) $(CFLAGS) $(DEFINES) $(LFLAGS) -DREADTAGS_MAIN -o $@ $^ $(LIBS) + +# We build a.out and omf version of the readtags library +readtags.o: readtags.c + $(CC) $(CFLAGS) $(DEFINES) -c -o $@ $^ + +readtags.a: readtags.o + if exist $@ del $@ + ar rc $@ $^ + +readtags.lib: readtags.a + emxomf $< + +.c.o: + $(CC) $(CFLAGS) $(DEFINES) -I. -c $< -o $@ + +.c.obj: + $(CC) $(CFLAGS) -Zomf $(DEFINES) -I. -c $< -o $@ + +# Delete all files that are not part of the source distribution +clean: + @if exist ctags.exe del ctags.exe + @if exist etags.exe del etags.exe + @if exist readtags.exe del readtags.exe + @if exist readtags.a del readtags.a + @if exist readtags.lib del readtags.lib + @if exist *.obj del *.obj + @if exist *.o del *.o diff --git a/mk_qdos.mak b/mk_qdos.mak new file mode 100644 index 0000000..b069040 --- /dev/null +++ b/mk_qdos.mak @@ -0,0 +1,100 @@ +# $Id: mk_qdos.mak 264 2003-02-13 02:59:30Z darren $ +# +# Makefile for ctags on QDOS/SMS systems and C68 v4.24 +# Submitted by Thierry Godefroy + +# Directories: + +T = ram1_ +P = drv1_C68_ + +# Programs name: + +CC = $(P)cc +AS = $(P)as68 +ASM = $(P)qmac +LD = $(P)ld + +# Programs flags: + +CCFLAGS = -tmp$(T) -v -Y$(P) -I$(P)include_ -O +ASFLAGS = -V +ASMFLAGS = -nolist +LDFLAGS = -v -L$(P)lib_ -bufp150K\ + +# Target name: + +EXEC = ctags + +# Additional libraries: + +LIBS = + +# Target dependencies: + +OBJEXT = o + +HEADERS = e_qdos.h \ + args.h ctags.h debug.h entry.h general.h get.h keyword.h \ + main.h options.h parse.h parsers.h read.h routines.h sort.h \ + strlist.h vstring.h + +OBJECTS = qdos.$(OBJEXT) \ + args.$(OBJEXT) \ + asm.$(OBJEXT) \ + asp.$(OBJEXT) \ + awk.$(OBJEXT) \ + beta.$(OBJEXT) \ + c.$(OBJEXT) \ + cobol.$(OBJEXT) \ + eiffel.$(OBJEXT) \ + entry.$(OBJEXT) \ + erlang.$(OBJEXT) \ + fortran.$(OBJEXT) \ + get.$(OBJEXT) \ + keyword.$(OBJEXT) \ + lisp.$(OBJEXT) \ + lregex.$(OBJEXT) \ + lua.$(OBJEXT) \ + main.$(OBJEXT) \ + make.$(OBJEXT) \ + options.$(OBJEXT) \ + parse.$(OBJEXT) \ + pascal.$(OBJEXT) \ + perl.$(OBJEXT) \ + php.$(OBJEXT) \ + python.$(OBJEXT) \ + read.$(OBJEXT) \ + rexx.$(OBJEXT) \ + routines.$(OBJEXT) \ + ruby.$(OBJEXT) \ + scheme.$(OBJEXT) \ + sh.$(OBJEXT) \ + slang.$(OBJEXT) \ + sort.$(OBJEXT) \ + sml.$(OBJEXT) \ + sql.$(OBJEXT) \ + strlist.$(OBJEXT) \ + tcl.$(OBJEXT) \ + verilog.$(OBJEXT) \ + vim.$(OBJEXT) \ + vstring.$(OBJEXT) \ + yacc.$(OBJEXT) + +$(EXEC) : $(OBJECTS) + $(LD) -o$(EXEC) $(LDFLAGS) $(OBJECTS) $(LIBS) + +$(OBJECTS): $(HEADERS) + +# Construction rules: + +_c_o : + $(CC) -c $(CCFLAGS) $< + +_s_o : + $(AS) $(ASFLAGS) $< $@ + +_asm_rel : + $(ASM) $< $(ASMFLAGS) + +#end diff --git a/mk_sas.mak b/mk_sas.mak new file mode 100644 index 0000000..476f5a6 --- /dev/null +++ b/mk_sas.mak @@ -0,0 +1,63 @@ +# $Id: mk_sas.mak 264 2003-02-13 02:59:30Z darren $ +# +# Makefile for SAS/C Amiga Compiler +# Submitted by Stefan Haubenthal + +CFLAGS= def AMIGA opt parm r sint + +OBJEXT = o + +OBJECTS = \ + args.$(OBJEXT) \ + asm.$(OBJEXT) \ + asp.$(OBJEXT) \ + awk.$(OBJEXT) \ + beta.$(OBJEXT) \ + c.$(OBJEXT) \ + cobol.$(OBJEXT) \ + eiffel.$(OBJEXT) \ + entry.$(OBJEXT) \ + erlang.$(OBJEXT) \ + fortran.$(OBJEXT) \ + get.$(OBJEXT) \ + keyword.$(OBJEXT) \ + lisp.$(OBJEXT) \ + lregex.$(OBJEXT) \ + lua.$(OBJEXT) \ + main.$(OBJEXT) \ + make.$(OBJEXT) \ + options.$(OBJEXT) \ + parse.$(OBJEXT) \ + pascal.$(OBJEXT) \ + perl.$(OBJEXT) \ + php.$(OBJEXT) \ + python.$(OBJEXT) \ + read.$(OBJEXT) \ + rexx.$(OBJEXT) \ + routines.$(OBJEXT) \ + ruby.$(OBJEXT) \ + scheme.$(OBJEXT) \ + sh.$(OBJEXT) \ + slang.$(OBJEXT) \ + sort.$(OBJEXT) \ + sml.$(OBJEXT) \ + sql.$(OBJEXT) \ + strlist.$(OBJEXT) \ + tcl.$(OBJEXT) \ + verilog.$(OBJEXT) \ + vim.$(OBJEXT) \ + vstring.$(OBJEXT) \ + yacc.$(OBJEXT) + +ctags: $(OBJECTS) + sc link to $@ $(OBJECTS) math s sint + +.c.o: + $(CC) $(CFLAGS) -o $*.o $*.c + +clean: + -delete $(OBJECTS) ctags.lnk + +archive: clean + @-delete force RAM:ctags.lha + lha -r a RAM:ctags // ctags diff --git a/mkinstalldirs b/mkinstalldirs new file mode 100755 index 0000000..b937de2 --- /dev/null +++ b/mkinstalldirs @@ -0,0 +1,40 @@ +#! /bin/sh +# mkinstalldirs --- make directory hierarchy +# Author: Noah Friedman +# Created: 1993-05-16 +# Public domain + +# $Id: mkinstalldirs 2 2001-11-02 04:53:43Z darren $ + +errstatus=0 + +for file +do + set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'` + shift + + pathcomp= + for d + do + pathcomp="$pathcomp$d" + case "$pathcomp" in + -* ) pathcomp=./$pathcomp ;; + esac + + if test ! -d "$pathcomp"; then + echo "mkdir $pathcomp" 1>&2 + + mkdir "$pathcomp" || lasterr=$? + + if test ! -d "$pathcomp"; then + errstatus=$lasterr + fi + fi + + pathcomp="$pathcomp/" + done +done + +exit $errstatus + +# mkinstalldirs ends here diff --git a/ocaml.c b/ocaml.c new file mode 100644 index 0000000..8fd6872 --- /dev/null +++ b/ocaml.c @@ -0,0 +1,1842 @@ +/* +* Copyright (c) 2009, Vincent Berthoux +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Objective Caml +* language files. +*/ +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "keyword.h" +#include "entry.h" +#include "options.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* To get rid of unused parameter warning in + * -Wextra */ +#ifdef UNUSED +#elif defined(__GNUC__) +# define UNUSED(x) UNUSED_ ## x __attribute__((unused)) +#elif defined(__LCLINT__) +# define UNUSED(x) /*@unused@*/ x +#else +# define UNUSED(x) x +#endif +#define OCAML_MAX_STACK_SIZE 256 + +typedef enum { + K_CLASS, /* Ocaml class, relatively rare */ + K_METHOD, /* class method */ + K_MODULE, /* Ocaml module OR functor */ + K_VAR, + K_TYPE, /* name of an OCaml type */ + K_FUNCTION, + K_CONSTRUCTOR, /* Constructor of a sum type */ + K_RECORDFIELD, + K_EXCEPTION +} ocamlKind; + +static kindOption OcamlKinds[] = { + {TRUE, 'c', "class", "classes"}, + {TRUE, 'm', "method", "Object's method"}, + {TRUE, 'M', "module", "Module or functor"}, + {TRUE, 'v', "var", "Global variable"}, + {TRUE, 't', "type", "Type name"}, + {TRUE, 'f', "function", "A function"}, + {TRUE, 'C', "Constructor", "A constructor"}, + {TRUE, 'r', "Record field", "A 'structure' field"}, + {TRUE, 'e', "Exception", "An exception"} +}; + +typedef enum { + OcaKEYWORD_and, + OcaKEYWORD_begin, + OcaKEYWORD_class, + OcaKEYWORD_do, + OcaKEYWORD_done, + OcaKEYWORD_else, + OcaKEYWORD_end, + OcaKEYWORD_exception, + OcaKEYWORD_for, + OcaKEYWORD_functor, + OcaKEYWORD_fun, + OcaKEYWORD_if, + OcaKEYWORD_in, + OcaKEYWORD_let, + OcaKEYWORD_match, + OcaKEYWORD_method, + OcaKEYWORD_module, + OcaKEYWORD_mutable, + OcaKEYWORD_object, + OcaKEYWORD_of, + OcaKEYWORD_rec, + OcaKEYWORD_sig, + OcaKEYWORD_struct, + OcaKEYWORD_then, + OcaKEYWORD_try, + OcaKEYWORD_type, + OcaKEYWORD_val, + OcaKEYWORD_virtual, + OcaKEYWORD_while, + OcaKEYWORD_with, + + OcaIDENTIFIER, + Tok_PARL, /* '(' */ + Tok_PARR, /* ')' */ + Tok_BRL, /* '[' */ + Tok_BRR, /* ']' */ + Tok_CurlL, /* '{' */ + Tok_CurlR, /* '}' */ + Tok_Prime, /* '\'' */ + Tok_Pipe, /* '|' */ + Tok_EQ, /* '=' */ + Tok_Val, /* string/number/poo */ + Tok_Op, /* any operator recognized by the language */ + Tok_semi, /* ';' */ + Tok_comma, /* ',' */ + Tok_To, /* '->' */ + Tok_Sharp, /* '#' */ + Tok_Backslash, /* '\\' */ + + Tok_EOF /* END of file */ +} ocamlKeyword; + +typedef struct sOcaKeywordDesc { + const char *name; + ocamlKeyword id; +} ocaKeywordDesc; + +typedef ocamlKeyword ocaToken; + +static const ocaKeywordDesc OcamlKeywordTable[] = { + { "and" , OcaKEYWORD_and }, + { "begin" , OcaKEYWORD_begin }, + { "class" , OcaKEYWORD_class }, + { "do" , OcaKEYWORD_do }, + { "done" , OcaKEYWORD_done }, + { "else" , OcaKEYWORD_else }, + { "end" , OcaKEYWORD_end }, + { "exception" , OcaKEYWORD_exception }, + { "for" , OcaKEYWORD_for }, + { "fun" , OcaKEYWORD_fun }, + { "function" , OcaKEYWORD_fun }, + { "functor" , OcaKEYWORD_functor }, + { "in" , OcaKEYWORD_in }, + { "let" , OcaKEYWORD_let }, + { "match" , OcaKEYWORD_match }, + { "method" , OcaKEYWORD_method }, + { "module" , OcaKEYWORD_module }, + { "mutable" , OcaKEYWORD_mutable }, + { "object" , OcaKEYWORD_object }, + { "of" , OcaKEYWORD_of }, + { "rec" , OcaKEYWORD_rec }, + { "sig" , OcaKEYWORD_sig }, + { "struct" , OcaKEYWORD_struct }, + { "then" , OcaKEYWORD_then }, + { "try" , OcaKEYWORD_try }, + { "type" , OcaKEYWORD_type }, + { "val" , OcaKEYWORD_val }, + { "value" , OcaKEYWORD_let }, /* just to handle revised syntax */ + { "virtual" , OcaKEYWORD_virtual }, + { "while" , OcaKEYWORD_while }, + { "with" , OcaKEYWORD_with }, + + { "or" , Tok_Op }, + { "mod " , Tok_Op }, + { "land " , Tok_Op }, + { "lor " , Tok_Op }, + { "lxor " , Tok_Op }, + { "lsl " , Tok_Op }, + { "lsr " , Tok_Op }, + { "asr" , Tok_Op }, + { "->" , Tok_To }, + { "true" , Tok_Val }, + { "false" , Tok_Val } +}; + +static langType Lang_Ocaml; + +boolean exportLocalInfo = FALSE; + +/*////////////////////////////////////////////////////////////////// +//// lexingInit */ +typedef struct _lexingState { + vString *name; /* current parsed identifier/operator */ + const unsigned char *cp; /* position in stream */ +} lexingState; + +/* array of the size of all possible value for a char */ +boolean isOperator[1 << (8 * sizeof (char))] = { FALSE }; + +static void initKeywordHash ( void ) +{ + const size_t count = sizeof (OcamlKeywordTable) / sizeof (ocaKeywordDesc); + size_t i; + + for (i = 0; i < count; ++i) + { + addKeyword (OcamlKeywordTable[i].name, Lang_Ocaml, + (int) OcamlKeywordTable[i].id); + } +} + +/* definition of all the operator in OCaml, + * /!\ certain operator get special treatment + * in regards of their role in OCaml grammar : + * '|' ':' '=' '~' and '?' */ +static void initOperatorTable ( void ) +{ + isOperator['!'] = TRUE; + isOperator['$'] = TRUE; + isOperator['%'] = TRUE; + isOperator['&'] = TRUE; + isOperator['*'] = TRUE; + isOperator['+'] = TRUE; + isOperator['-'] = TRUE; + isOperator['.'] = TRUE; + isOperator['/'] = TRUE; + isOperator[':'] = TRUE; + isOperator['<'] = TRUE; + isOperator['='] = TRUE; + isOperator['>'] = TRUE; + isOperator['?'] = TRUE; + isOperator['@'] = TRUE; + isOperator['^'] = TRUE; + isOperator['~'] = TRUE; + isOperator['|'] = TRUE; +} + +/*////////////////////////////////////////////////////////////////////// +//// Lexing */ +static boolean isNum (char c) +{ + return c >= '0' && c <= '9'; +} +static boolean isLowerAlpha (char c) +{ + return c >= 'a' && c <= 'z'; +} + +static boolean isUpperAlpha (char c) +{ + return c >= 'A' && c <= 'Z'; +} + +static boolean isAlpha (char c) +{ + return isLowerAlpha (c) || isUpperAlpha (c); +} + +static boolean isIdent (char c) +{ + return isNum (c) || isAlpha (c) || c == '_' || c == '\''; +} + +static boolean isSpace (char c) +{ + return c == ' ' || c == '\t' || c == '\r' || c == '\n'; +} + +static void eatWhiteSpace (lexingState * st) +{ + const unsigned char *cp = st->cp; + while (isSpace (*cp)) + cp++; + + st->cp = cp; +} + +static void eatString (lexingState * st) +{ + boolean lastIsBackSlash = FALSE; + boolean unfinished = TRUE; + const unsigned char *c = st->cp + 1; + + while (unfinished) + { + /* end of line should never happen. + * we tolerate it */ + if (c == NULL || c[0] == '\0') + break; + else if (*c == '"' && !lastIsBackSlash) + unfinished = FALSE; + else + lastIsBackSlash = *c == '\\'; + + c++; + } + + st->cp = c; +} + +static void eatComment (lexingState * st) +{ + boolean unfinished = TRUE; + boolean lastIsStar = FALSE; + const unsigned char *c = st->cp + 2; + + while (unfinished) + { + /* we've reached the end of the line.. + * so we have to reload a line... */ + if (c == NULL || *c == '\0') + { + st->cp = fileReadLine (); + /* WOOPS... no more input... + * we return, next lexing read + * will be null and ok */ + if (st->cp == NULL) + return; + c = st->cp; + continue; + } + /* we've reached the end of the comment */ + else if (*c == ')' && lastIsStar) + unfinished = FALSE; + /* here we deal with imbricated comment, which + * are allowed in OCaml */ + else if (c[0] == '(' && c[1] == '*') + { + st->cp = c; + eatComment (st); + c = st->cp; + lastIsStar = FALSE; + } + else + lastIsStar = '*' == *c; + + c++; + } + + st->cp = c; +} + +static void readIdentifier (lexingState * st) +{ + const unsigned char *p; + vStringClear (st->name); + + /* first char is a simple letter */ + if (isAlpha (*st->cp) || *st->cp == '_') + vStringPut (st->name, (int) *st->cp); + + /* Go till you get identifier chars */ + for (p = st->cp + 1; isIdent (*p); p++) + vStringPut (st->name, (int) *p); + + st->cp = p; + + vStringTerminate (st->name); +} + +static ocamlKeyword eatNumber (lexingState * st) +{ + while (isNum (*st->cp)) + st->cp++; + return Tok_Val; +} + +/* Operator can be defined in OCaml as a function + * so we must be ample enough to parse them normally */ +static ocamlKeyword eatOperator (lexingState * st) +{ + int count = 0; + const unsigned char *root = st->cp; + + vStringClear (st->name); + + while (isOperator[st->cp[count]]) + { + vStringPut (st->name, st->cp[count]); + count++; + } + + vStringTerminate (st->name); + + st->cp += count; + if (count <= 1) + { + switch (root[0]) + { + case '|': + return Tok_Pipe; + case '=': + return Tok_EQ; + default: + return Tok_Op; + } + } + else if (count == 2 && root[0] == '-' && root[1] == '>') + return Tok_To; + else + return Tok_Op; +} + +/* The lexer is in charge of reading the file. + * Some of sub-lexer (like eatComment) also read file. + * lexing is finished when the lexer return Tok_EOF */ +static ocamlKeyword lex (lexingState * st) +{ + int retType; + /* handling data input here */ + while (st->cp == NULL || st->cp[0] == '\0') + { + st->cp = fileReadLine (); + if (st->cp == NULL) + return Tok_EOF; + } + + if (isAlpha (*st->cp)) + { + readIdentifier (st); + retType = lookupKeyword (vStringValue (st->name), Lang_Ocaml); + + if (retType == -1) /* If it's not a keyword */ + { + return OcaIDENTIFIER; + } + else + { + return retType; + } + } + else if (isNum (*st->cp)) + return eatNumber (st); + else if (isSpace (*st->cp)) + { + eatWhiteSpace (st); + return lex (st); + } + /* OCaml permit the definition of our own operators + * so here we check all the consecuting chars which + * are operators to discard them. */ + else if (isOperator[*st->cp]) + return eatOperator (st); + else + switch (*st->cp) + { + case '(': + if (st->cp[1] == '*') /* ergl, a comment */ + { + eatComment (st); + return lex (st); + } + else + { + st->cp++; + return Tok_PARL; + } + + case ')': + st->cp++; + return Tok_PARR; + case '[': + st->cp++; + return Tok_BRL; + case ']': + st->cp++; + return Tok_BRR; + case '{': + st->cp++; + return Tok_CurlL; + case '}': + st->cp++; + return Tok_CurlR; + case '\'': + st->cp++; + return Tok_Prime; + case ',': + st->cp++; + return Tok_comma; + case '=': + st->cp++; + return Tok_EQ; + case ';': + st->cp++; + return Tok_semi; + case '"': + eatString (st); + return Tok_Val; + case '_': + st->cp++; + return Tok_Val; + case '#': + st->cp++; + return Tok_Sharp; + case '\\': + st->cp++; + return Tok_Backslash; + + default: + st->cp++; + break; + } + + /* default return if nothing is recognized, + * shouldn't happen, but at least, it will + * be handled without destroying the parsing. */ + return Tok_Val; +} + +/*////////////////////////////////////////////////////////////////////// +//// Parsing */ +typedef void (*parseNext) (vString * const ident, ocaToken what); + +/********** Helpers */ +/* This variable hold the 'parser' which is going to + * handle the next token */ +parseNext toDoNext; + +/* Special variable used by parser eater to + * determine which action to put after their + * job is finished. */ +parseNext comeAfter; + +/* If a token put an end to current delcaration/ + * statement */ +ocaToken terminatingToken; + +/* Token to be searched by the different + * parser eater. */ +ocaToken waitedToken; + +/* name of the last class, used for + * context stacking. */ +vString *lastClass; + +vString *voidName; + +typedef enum _sContextKind { + ContextStrong, + ContextSoft +} contextKind; + +typedef enum _sContextType { + ContextType, + ContextModule, + ContextClass, + ContextValue, + ContextFunction, + ContextMethod, + ContextBlock +} contextType; + +typedef struct _sOcamlContext { + contextKind kind; /* well if the context is strong or not */ + contextType type; + parseNext callback; /* what to do when a context is pop'd */ + vString *contextName; /* name, if any, of the surrounding context */ +} ocamlContext; + +/* context stack, can be used to output scope information + * into the tag file. */ +ocamlContext stack[OCAML_MAX_STACK_SIZE]; +/* current position in the tag */ +int stackIndex; + +/* special function, often recalled, so putting it here */ +static void globalScope (vString * const ident, ocaToken what); + +/* Return : index of the last named context if one + * is found, -1 otherwise */ +static int getLastNamedIndex ( void ) +{ + int i; + + for (i = stackIndex - 1; i >= 0; --i) + { + if (stack[i].contextName->buffer && + strlen (stack[i].contextName->buffer) > 0) + { + return i; + } + } + + return -1; +} + +static const char *contextDescription (contextType t) +{ + switch (t) + { + case ContextFunction: + return "function"; + case ContextMethod: + return "method"; + case ContextValue: + return "value"; + case ContextModule: + return "Module"; + case ContextType: + return "type"; + case ContextClass: + return "class"; + case ContextBlock: + return "begin/end"; + } + + return NULL; +} + +static char contextTypeSuffix (contextType t) +{ + switch (t) + { + case ContextFunction: + case ContextMethod: + case ContextValue: + case ContextModule: + return '/'; + case ContextType: + return '.'; + case ContextClass: + return '#'; + case ContextBlock: + return ' '; + } + + return '$'; +} + +/* Push a new context, handle null string */ +static void pushContext (contextKind kind, contextType type, parseNext after, + vString const *contextName) +{ + int parentIndex; + + if (stackIndex >= OCAML_MAX_STACK_SIZE) + { + verbose ("OCaml Maximum depth reached"); + return; + } + + + stack[stackIndex].kind = kind; + stack[stackIndex].type = type; + stack[stackIndex].callback = after; + + parentIndex = getLastNamedIndex (); + if (contextName == NULL) + { + vStringClear (stack[stackIndex++].contextName); + return; + } + + if (parentIndex >= 0) + { + vStringCopy (stack[stackIndex].contextName, + stack[parentIndex].contextName); + vStringPut (stack[stackIndex].contextName, + contextTypeSuffix (stack[parentIndex].type)); + + vStringCat (stack[stackIndex].contextName, contextName); + } + else + vStringCopy (stack[stackIndex].contextName, contextName); + + stackIndex++; +} + +static void pushStrongContext (vString * name, contextType type) +{ + pushContext (ContextStrong, type, &globalScope, name); +} + +static void pushSoftContext (parseNext continuation, + vString * name, contextType type) +{ + pushContext (ContextSoft, type, continuation, name); +} + +static void pushEmptyContext (parseNext continuation) +{ + pushContext (ContextSoft, ContextValue, continuation, NULL); +} + +/* unroll the stack until the last named context. + * then discard it. Used to handle the : + * let f x y = ... + * in ... + * where the context is reseted after the in. Context may have + * been really nested before that. */ +static void popLastNamed ( void ) +{ + int i = getLastNamedIndex (); + + if (i >= 0) + { + stackIndex = i; + toDoNext = stack[i].callback; + vStringClear (stack[i].contextName); + } + else + { + /* ok, no named context found... + * (should not happen). */ + stackIndex = 0; + toDoNext = &globalScope; + } +} + +/* pop a context without regarding it's content + * (beside handling empty stack case) */ +static void popSoftContext ( void ) +{ + if (stackIndex <= 0) + { + toDoNext = &globalScope; + } + else + { + stackIndex--; + toDoNext = stack[stackIndex].callback; + vStringClear (stack[stackIndex].contextName); + } +} + +/* Reset everything until the last global space. + * a strong context can be : + * - module + * - class definition + * - the initial global space + * - a _global_ delcaration (let at global scope or in a module). + * Created to exit quickly deeply nested context */ +static contextType popStrongContext ( void ) +{ + int i; + + for (i = stackIndex - 1; i >= 0; --i) + { + if (stack[i].kind == ContextStrong) + { + stackIndex = i; + toDoNext = stack[i].callback; + vStringClear (stack[i].contextName); + return stack[i].type; + } + } + /* ok, no strong context found... */ + stackIndex = 0; + toDoNext = &globalScope; + return -1; +} + +/* Ignore everything till waitedToken and jump to comeAfter. + * If the "end" keyword is encountered break, doesn't remember + * why though. */ +static void tillToken (vString * const UNUSED (ident), ocaToken what) +{ + if (what == waitedToken) + toDoNext = comeAfter; + else if (what == OcaKEYWORD_end) + { + popStrongContext (); + toDoNext = &globalScope; + } +} + +/* Ignore everything till a waitedToken is seen, but + * take care of balanced parentheses/bracket use */ +static void contextualTillToken (vString * const UNUSED (ident), ocaToken what) +{ + static int parentheses = 0; + static int bracket = 0; + static int curly = 0; + + switch (what) + { + case Tok_PARL: + parentheses--; + break; + case Tok_PARR: + parentheses++; + break; + case Tok_CurlL: + curly--; + break; + case Tok_CurlR: + curly++; + break; + case Tok_BRL: + bracket--; + break; + case Tok_BRR: + bracket++; + break; + + default: /* other token are ignored */ + break; + } + + if (what == waitedToken && parentheses == 0 && bracket == 0 && curly == 0) + toDoNext = comeAfter; + + else if (what == OcaKEYWORD_end) + { + popStrongContext (); + toDoNext = &globalScope; + } +} + +/* Wait for waitedToken and jump to comeAfter or let + * the globalScope handle declarations */ +static void tillTokenOrFallback (vString * const ident, ocaToken what) +{ + if (what == waitedToken) + toDoNext = comeAfter; + else + globalScope (ident, what); +} + +/* ignore token till waitedToken, or give up if find + * terminatingToken. Use globalScope to handle new + * declarations. */ +static void tillTokenOrTerminatingOrFallback (vString * const ident, + ocaToken what) +{ + if (what == waitedToken) + toDoNext = comeAfter; + else if (what == terminatingToken) + toDoNext = globalScope; + else + globalScope (ident, what); +} + +/* ignore the next token in the stream and jump to the + * given comeAfter state */ +static void ignoreToken (vString * const UNUSED (ident), ocaToken UNUSED (what)) +{ + toDoNext = comeAfter; +} + +/********** Grammar */ +/* the purpose of each function is detailled near their + * implementation */ + +static void killCurrentState ( void ) +{ + + /* Tracking the kind of previous strong + * context, if it doesn't match with a + * really strong entity, repop */ + switch (popStrongContext ()) + { + + case ContextValue: + popStrongContext (); + break; + case ContextFunction: + popStrongContext (); + break; + case ContextMethod: + popStrongContext (); + break; + + case ContextType: + popStrongContext(); + break; + case ContextBlock: + break; + case ContextModule: + break; + case ContextClass: + break; + default: + /* nothing more */ + break; + } +} + +/* used to prepare tag for OCaml, just in case their is a need to + * add additional information to the tag. */ +static void prepareTag (tagEntryInfo * tag, vString const *name, ocamlKind kind) +{ + int parentIndex; + + initTagEntry (tag, vStringValue (name)); + tag->kindName = OcamlKinds[kind].name; + tag->kind = OcamlKinds[kind].letter; + + parentIndex = getLastNamedIndex (); + if (parentIndex >= 0) + { + tag->extensionFields.scope[0] = + contextDescription (stack[parentIndex].type); + tag->extensionFields.scope[1] = + vStringValue (stack[parentIndex].contextName); + } +} + +/* Used to centralise tag creation, and be able to add + * more information to it in the future */ +static void addTag (vString * const ident, int kind) +{ + tagEntryInfo toCreate; + prepareTag (&toCreate, ident, kind); + makeTagEntry (&toCreate); +} + +boolean needStrongPoping = FALSE; +static void requestStrongPoping ( void ) +{ + needStrongPoping = TRUE; +} + +static void cleanupPreviousParser ( void ) +{ + if (needStrongPoping) + { + needStrongPoping = FALSE; + popStrongContext (); + } +} + +/* Due to some circular dependencies, the following functions + * must be forward-declared. */ +static void letParam (vString * const ident, ocaToken what); +static void localScope (vString * const ident, ocaToken what); +static void mayRedeclare (vString * const ident, ocaToken what); +static void typeSpecification (vString * const ident, ocaToken what); + +/* + * Parse a record type + * type ident = // parsed previously + * { + * ident1: type1; + * ident2: type2; + * } + */ +static void typeRecord (vString * const ident, ocaToken what) +{ + switch (what) + { + case OcaIDENTIFIER: + addTag (ident, K_RECORDFIELD); + terminatingToken = Tok_CurlR; + waitedToken = Tok_semi; + comeAfter = &typeRecord; + toDoNext = &tillTokenOrTerminatingOrFallback; + break; + + case OcaKEYWORD_mutable: + /* ignore it */ + break; + + case Tok_CurlR: + popStrongContext (); + toDoNext = &globalScope; + break; + + default: /* don't care */ + break; + } +} + +/* handle : + * exception ExceptionName ... */ +static void exceptionDecl (vString * const ident, ocaToken what) +{ + if (what == OcaIDENTIFIER) + { + addTag (ident, K_EXCEPTION); + } + /* don't know what to do on else... */ + + toDoNext = &globalScope; +} + +tagEntryInfo tempTag; +vString *tempIdent; + +/* Ensure a constructor is not a type path beginning + * with a module */ +static void constructorValidation (vString * const ident, ocaToken what) +{ + switch (what) + { + case Tok_Op: /* if we got a '.' which is an operator */ + toDoNext = &globalScope; + popStrongContext (); + needStrongPoping = FALSE; + break; + + case OcaKEYWORD_of: /* OK, it must be a constructor :) */ + makeTagEntry (&tempTag); + vStringClear (tempIdent); + toDoNext = &tillTokenOrFallback; + comeAfter = &typeSpecification; + waitedToken = Tok_Pipe; + break; + + case Tok_Pipe: /* OK, it was a constructor :) */ + makeTagEntry (&tempTag); + vStringClear (tempIdent); + toDoNext = &typeSpecification; + break; + + default: /* and mean that we're not facing a module name */ + makeTagEntry (&tempTag); + vStringClear (tempIdent); + toDoNext = &tillTokenOrFallback; + comeAfter = &typeSpecification; + waitedToken = Tok_Pipe; + + /* nothing in the context, discard it */ + popStrongContext (); + + /* to be sure we use this token */ + globalScope (ident, what); + } +} + + +/* Parse beginning of type definition + * type 'avar ident = + * or + * type ('var1, 'var2) ident = + */ +static void typeDecl (vString * const ident, ocaToken what) +{ + + switch (what) + { + /* parameterized */ + case Tok_Prime: + comeAfter = &typeDecl; + toDoNext = &ignoreToken; + break; + /* LOTS of parameters */ + case Tok_PARL: + comeAfter = &typeDecl; + waitedToken = Tok_PARR; + toDoNext = &tillToken; + break; + + case OcaIDENTIFIER: + addTag (ident, K_TYPE); + pushStrongContext (ident, ContextType); + requestStrongPoping (); + waitedToken = Tok_EQ; + comeAfter = &typeSpecification; + toDoNext = &tillTokenOrFallback; + break; + + default: + globalScope (ident, what); + } +} + +/* Parse type of kind + * type bidule = Ctor1 of ... + * | Ctor2 + * | Ctor3 of ... + * or + * type bidule = | Ctor1 of ... | Ctor2 + * + * when type bidule = { ... } is detected, + * let typeRecord handle it. */ +static void typeSpecification (vString * const ident, ocaToken what) +{ + + switch (what) + { + case OcaIDENTIFIER: + if (isUpperAlpha (ident->buffer[0])) + { + /* here we handle type aliases of type + * type foo = AnotherModule.bar + * AnotherModule can mistakenly be took + * for a constructor. */ + vStringCopy (tempIdent, ident); + prepareTag (&tempTag, tempIdent, K_CONSTRUCTOR); + toDoNext = &constructorValidation; + } + else + { + toDoNext = &tillTokenOrFallback; + comeAfter = &typeSpecification; + waitedToken = Tok_Pipe; + } + break; + + case OcaKEYWORD_and: + toDoNext = &typeDecl; + break; + + case Tok_BRL: /* the '[' & ']' are ignored to accommodate */ + case Tok_BRR: /* with the revised syntax */ + case Tok_Pipe: + /* just ignore it */ + break; + + case Tok_CurlL: + toDoNext = &typeRecord; + break; + + default: /* don't care */ + break; + } +} + + +static boolean dirtySpecialParam = FALSE; + + +/* parse the ~label and ~label:type parameter */ +static void parseLabel (vString * const ident, ocaToken what) +{ + static int parCount = 0; + + switch (what) + { + case OcaIDENTIFIER: + if (!dirtySpecialParam) + { + + if (exportLocalInfo) + addTag (ident, K_VAR); + + dirtySpecialParam = TRUE; + } + break; + + case Tok_PARL: + parCount++; + break; + + case Tok_PARR: + parCount--; + if (parCount == 0) + toDoNext = &letParam; + break; + + case Tok_Op: + if (ident->buffer[0] == ':') + { + toDoNext = &ignoreToken; + comeAfter = &letParam; + } + else if (parCount == 0 && dirtySpecialParam) + { + toDoNext = &letParam; + letParam (ident, what); + } + break; + + default: + if (parCount == 0 && dirtySpecialParam) + { + toDoNext = &letParam; + letParam (ident, what); + } + break; + } +} + + +/* Optional argument with syntax like this : + * ?(foo = value) */ +static void parseOptionnal (vString * const ident, ocaToken what) +{ + static int parCount = 0; + + + switch (what) + { + case OcaIDENTIFIER: + if (!dirtySpecialParam) + { + if (exportLocalInfo) + addTag (ident, K_VAR); + + dirtySpecialParam = TRUE; + + if (parCount == 0) + toDoNext = &letParam; + } + break; + + case Tok_PARL: + parCount++; + break; + + case Tok_PARR: + parCount--; + if (parCount == 0) + toDoNext = &letParam; + break; + + default: /* don't care */ + break; + } +} + + +/** handle let inside functions (so like it's name + * say : local let */ +static void localLet (vString * const ident, ocaToken what) +{ + switch (what) + { + case Tok_PARL: + /* We ignore this token to be able to parse such + * declarations : + * let (ident : type) = ... + */ + break; + + case OcaKEYWORD_rec: + /* just ignore to be able to parse such declarations: + * let rec ident = ... */ + break; + + case Tok_Op: + /* we are defining a new operator, it's a + * function definition */ + if (exportLocalInfo) + addTag (ident, K_FUNCTION); + + pushSoftContext (mayRedeclare, ident, ContextFunction); + toDoNext = &letParam; + break; + + /* Can be a weiiird binding, or an '_' */ + case Tok_Val: + if (exportLocalInfo) + addTag (ident, K_VAR); + pushSoftContext (mayRedeclare, ident, ContextValue); + toDoNext = &letParam; + break; + + case OcaIDENTIFIER: + if (exportLocalInfo) + addTag (ident, K_VAR); + pushSoftContext (mayRedeclare, ident, ContextValue); + toDoNext = &letParam; + break; + + case OcaKEYWORD_end: + popStrongContext (); + break; + + default: + toDoNext = &localScope; + break; + } +} + +/* parse : + * | pattern pattern -> ... + * or + * pattern apttern apttern -> ... + * we ignore all identifiers declared in the pattern, + * because their scope is likely to be even more limited + * than the let definitions. + * Used after a match ... with, or a function ... or fun ... + * because their syntax is similar. */ +static void matchPattern (vString * const UNUSED (ident), ocaToken what) +{ + switch (what) + { + case Tok_To: + pushEmptyContext (&matchPattern); + toDoNext = &mayRedeclare; + break; + + + case OcaKEYWORD_in: + popLastNamed (); + break; + + default: + break; + } +} + +/* Used at the beginning of a new scope (begin of a + * definition, parenthesis...) to catch inner let + * definition that may be in. */ +static void mayRedeclare (vString * const ident, ocaToken what) +{ + switch (what) + { + case OcaKEYWORD_let: + case OcaKEYWORD_val: + toDoNext = localLet; + break; + + case OcaKEYWORD_object: + vStringClear (lastClass); + pushContext (ContextStrong, ContextClass, + &localScope, NULL /*voidName */ ); + needStrongPoping = FALSE; + toDoNext = &globalScope; + break; + + case OcaKEYWORD_for: + case OcaKEYWORD_while: + toDoNext = &tillToken; + waitedToken = OcaKEYWORD_do; + comeAfter = &mayRedeclare; + break; + + case OcaKEYWORD_try: + toDoNext = &mayRedeclare; + pushSoftContext (matchPattern, ident, ContextFunction); + break; + + case OcaKEYWORD_fun: + toDoNext = &matchPattern; + break; + + /* Handle the special ;; from the OCaml + * Top level */ + case Tok_semi: + default: + toDoNext = &localScope; + localScope (ident, what); + } +} + +/* parse : + * p1 p2 ... pn = ... + * or + * ?(p1=v) p2 ~p3 ~pn:ja ... = ... */ +static void letParam (vString * const ident, ocaToken what) +{ + switch (what) + { + case Tok_EQ: + toDoNext = &mayRedeclare; + break; + + case OcaIDENTIFIER: + if (exportLocalInfo) + addTag (ident, K_VAR); + break; + + case Tok_Op: + switch (ident->buffer[0]) + { + case ':': + /*popSoftContext(); */ + /* we got a type signature */ + comeAfter = &mayRedeclare; + toDoNext = &tillTokenOrFallback; + waitedToken = Tok_EQ; + break; + + /* parse something like + * ~varname:type + * or + * ~varname + * or + * ~(varname: long type) */ + case '~': + toDoNext = &parseLabel; + dirtySpecialParam = FALSE; + break; + + /* Optional argument with syntax like this : + * ?(bla = value) + * or + * ?bla */ + case '?': + toDoNext = &parseOptionnal; + dirtySpecialParam = FALSE; + break; + + default: + break; + } + break; + + default: /* don't care */ + break; + } +} + + +/* parse object ... + * used to be sure the class definition is not a type + * alias */ +static void classSpecif (vString * const UNUSED (ident), ocaToken what) +{ + switch (what) + { + case OcaKEYWORD_object: + pushStrongContext (lastClass, ContextClass); + toDoNext = &globalScope; + break; + + default: + vStringClear (lastClass); + toDoNext = &globalScope; + } +} + +/* Handle a method ... class declaration. + * nearly a copy/paste of globalLet. */ +static void methodDecl (vString * const ident, ocaToken what) +{ + switch (what) + { + case Tok_PARL: + /* We ignore this token to be able to parse such + * declarations : + * let (ident : type) = ... */ + break; + + case OcaKEYWORD_mutable: + case OcaKEYWORD_virtual: + case OcaKEYWORD_rec: + /* just ignore to be able to parse such declarations: + * let rec ident = ... */ + break; + + case OcaIDENTIFIER: + addTag (ident, K_METHOD); + /* Normal pushing to get good subs */ + pushStrongContext (ident, ContextMethod); + /*pushSoftContext( globalScope, ident, ContextMethod ); */ + toDoNext = &letParam; + break; + + case OcaKEYWORD_end: + popStrongContext (); + break; + + default: + toDoNext = &globalScope; + break; + } +} + +/* name of the last module, used for + * context stacking. */ +vString *lastModule; + + +/* parse + * ... struct (* new global scope *) end + * or + * ... sig (* new global scope *) end + * or + * functor ... -> moduleSpecif + */ +static void moduleSpecif (vString * const ident, ocaToken what) +{ + switch (what) + { + case OcaKEYWORD_functor: + toDoNext = &contextualTillToken; + waitedToken = Tok_To; + comeAfter = &moduleSpecif; + break; + + case OcaKEYWORD_struct: + case OcaKEYWORD_sig: + pushStrongContext (lastModule, ContextModule); + toDoNext = &globalScope; + break; + + case Tok_PARL: /* ( */ + toDoNext = &contextualTillToken; + comeAfter = &globalScope; + waitedToken = Tok_PARR; + contextualTillToken (ident, what); + break; + + default: + vStringClear (lastModule); + toDoNext = &globalScope; + } +} + +/* parse : + * module name = ... + * then pass the token stream to moduleSpecif */ +static void moduleDecl (vString * const ident, ocaToken what) +{ + switch (what) + { + case OcaKEYWORD_type: + /* just ignore it, name come after */ + break; + + case OcaIDENTIFIER: + addTag (ident, K_MODULE); + vStringCopy (lastModule, ident); + waitedToken = Tok_EQ; + comeAfter = &moduleSpecif; + toDoNext = &contextualTillToken; + break; + + default: /* don't care */ + break; + } +} + +/* parse : + * class name = ... + * or + * class virtual ['a,'b] classname = ... */ +static void classDecl (vString * const ident, ocaToken what) +{ + switch (what) + { + case OcaIDENTIFIER: + addTag (ident, K_CLASS); + vStringCopy (lastClass, ident); + toDoNext = &contextualTillToken; + waitedToken = Tok_EQ; + comeAfter = &classSpecif; + break; + + case Tok_BRL: + toDoNext = &tillToken; + waitedToken = Tok_BRR; + comeAfter = &classDecl; + break; + + default: + break; + } +} + +/* Handle a global + * let ident ... + * or + * let rec ident ... */ +static void globalLet (vString * const ident, ocaToken what) +{ + switch (what) + { + case Tok_PARL: + /* We ignore this token to be able to parse such + * declarations : + * let (ident : type) = ... + */ + break; + + case OcaKEYWORD_mutable: + case OcaKEYWORD_virtual: + case OcaKEYWORD_rec: + /* just ignore to be able to parse such declarations: + * let rec ident = ... */ + break; + + case Tok_Op: + /* we are defining a new operator, it's a + * function definition */ + addTag (ident, K_FUNCTION); + pushStrongContext (ident, ContextFunction); + toDoNext = &letParam; + break; + + case OcaIDENTIFIER: + addTag (ident, K_VAR); + pushStrongContext (ident, ContextValue); + requestStrongPoping (); + toDoNext = &letParam; + break; + + case OcaKEYWORD_end: + popStrongContext (); + break; + + default: + toDoNext = &globalScope; + break; + } +} + +/* Handle the "strong" top levels, all 'big' declarations + * happen here */ +static void globalScope (vString * const UNUSED (ident), ocaToken what) +{ + /* Do not touch, this is used only by the global scope + * to handle an 'and' */ + static parseNext previousParser = NULL; + + switch (what) + { + case OcaKEYWORD_and: + cleanupPreviousParser (); + toDoNext = previousParser; + break; + + case OcaKEYWORD_type: + cleanupPreviousParser (); + toDoNext = &typeDecl; + previousParser = &typeDecl; + break; + + case OcaKEYWORD_class: + cleanupPreviousParser (); + toDoNext = &classDecl; + previousParser = &classDecl; + break; + + case OcaKEYWORD_module: + cleanupPreviousParser (); + toDoNext = &moduleDecl; + previousParser = &moduleDecl; + break; + + case OcaKEYWORD_end: + needStrongPoping = FALSE; + killCurrentState (); + /*popStrongContext(); */ + break; + + case OcaKEYWORD_method: + cleanupPreviousParser (); + toDoNext = &methodDecl; + /* and is not allowed in methods */ + break; + + /* val is mixed with let as global + * to be able to handle mli & new syntax */ + case OcaKEYWORD_val: + case OcaKEYWORD_let: + cleanupPreviousParser (); + toDoNext = &globalLet; + previousParser = &globalLet; + break; + + case OcaKEYWORD_exception: + cleanupPreviousParser (); + toDoNext = &exceptionDecl; + previousParser = NULL; + break; + + /* must be a #line directive, discard the + * whole line. */ + case Tok_Sharp: + /* ignore */ + break; + + default: + /* we don't care */ + break; + } +} + +/* Parse expression. Well ignore it is more the case, + * ignore all tokens except "shocking" keywords */ +static void localScope (vString * const ident, ocaToken what) +{ + switch (what) + { + case Tok_Pipe: + case Tok_PARR: + case Tok_BRR: + case Tok_CurlR: + popSoftContext (); + break; + + /* Everything that `begin` has an `end` + * as end is overloaded and signal many end + * of things, we add an empty strong context to + * avoid problem with the end. + */ + case OcaKEYWORD_begin: + pushContext (ContextStrong, ContextBlock, &mayRedeclare, NULL); + toDoNext = &mayRedeclare; + break; + + case OcaKEYWORD_in: + popLastNamed (); + break; + + /* Ok, we got a '{', which is much likely to create + * a record. We cannot treat it like other [ && (, + * because it may contain the 'with' keyword and screw + * everything else. */ + case Tok_CurlL: + toDoNext = &contextualTillToken; + waitedToken = Tok_CurlR; + comeAfter = &localScope; + contextualTillToken (ident, what); + break; + + /* Yeah imperative feature of OCaml, + * a ';' like in C */ + case Tok_semi: + toDoNext = &mayRedeclare; + break; + + case Tok_PARL: + case Tok_BRL: + pushEmptyContext (&localScope); + toDoNext = &mayRedeclare; + break; + + case OcaKEYWORD_and: + popLastNamed (); + toDoNext = &localLet; + break; + + case OcaKEYWORD_else: + case OcaKEYWORD_then: + popSoftContext (); + pushEmptyContext (&localScope); + toDoNext = &mayRedeclare; + break; + + case OcaKEYWORD_if: + pushEmptyContext (&localScope); + toDoNext = &mayRedeclare; + break; + + case OcaKEYWORD_match: + pushEmptyContext (&localScope); + toDoNext = &mayRedeclare; + break; + + case OcaKEYWORD_with: + popSoftContext (); + toDoNext = &matchPattern; + pushEmptyContext (&matchPattern); + break; + + case OcaKEYWORD_end: + killCurrentState (); + break; + + + case OcaKEYWORD_fun: + comeAfter = &mayRedeclare; + toDoNext = &tillToken; + waitedToken = Tok_To; + break; + + case OcaKEYWORD_done: + case OcaKEYWORD_val: + /* doesn't care */ + break; + + default: + requestStrongPoping (); + globalScope (ident, what); + break; + } +} + +/*//////////////////////////////////////////////////////////////// +//// Deal with the system */ +/* in OCaml the file name is the module name used in the language + * with it first letter put in upper case */ +static void computeModuleName ( void ) +{ + /* in Ocaml the file name define a module. + * so we define a module =) + */ + const char *filename = getSourceFileName (); + int beginIndex = 0; + int endIndex = strlen (filename) - 1; + vString *moduleName = vStringNew (); + + while (filename[endIndex] != '.' && endIndex > 0) + endIndex--; + + /* avoid problem with path in front of filename */ + beginIndex = endIndex; + while (beginIndex > 0) + { + if (filename[beginIndex] == '\\' || filename[beginIndex] == '/') + { + beginIndex++; + break; + } + + beginIndex--; + } + + vStringNCopyS (moduleName, &filename[beginIndex], endIndex - beginIndex); + vStringTerminate (moduleName); + + if (isLowerAlpha (moduleName->buffer[0])) + moduleName->buffer[0] += ('A' - 'a'); + + makeSimpleTag (moduleName, OcamlKinds, K_MODULE); + vStringDelete (moduleName); +} + +/* Allocate all string of the context stack */ +static void initStack ( void ) +{ + int i; + for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i) + stack[i].contextName = vStringNew (); +} + +static void clearStack ( void ) +{ + int i; + for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i) + vStringDelete (stack[i].contextName); +} + +static void findOcamlTags (void) +{ + vString *name = vStringNew (); + lexingState st; + ocaToken tok; + + computeModuleName (); + initStack (); + tempIdent = vStringNew (); + lastModule = vStringNew (); + lastClass = vStringNew (); + voidName = vStringNew (); + vStringCopyS (voidName, "_"); + + st.name = vStringNew (); + st.cp = fileReadLine (); + toDoNext = &globalScope; + tok = lex (&st); + while (tok != Tok_EOF) + { + (*toDoNext) (st.name, tok); + tok = lex (&st); + } + + vStringDelete (name); + vStringDelete (voidName); + vStringDelete (tempIdent); + vStringDelete (lastModule); + vStringDelete (lastClass); + clearStack (); +} + +static void ocamlInitialize (const langType language) +{ + Lang_Ocaml = language; + + initOperatorTable (); + initKeywordHash (); +} + +extern parserDefinition *OcamlParser (void) +{ + static const char *const extensions[] = { "ml", "mli", NULL }; + parserDefinition *def = parserNew ("OCaml"); + def->kinds = OcamlKinds; + def->kindCount = KIND_COUNT (OcamlKinds); + def->extensions = extensions; + def->parser = findOcamlTags; + def->initialize = ocamlInitialize; + + return def; +} diff --git a/options.c b/options.c new file mode 100644 index 0000000..d26627f --- /dev/null +++ b/options.c @@ -0,0 +1,1829 @@ +/* +* $Id: options.c 576 2007-06-30 04:16:23Z elliotth $ +* +* Copyright (c) 1996-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions to process command line options. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include +#include +#include +#include /* to declare isspace () */ + +#include "ctags.h" +#include "debug.h" +#include "main.h" +#define OPTION_WRITE +#include "options.h" +#include "parse.h" +#include "routines.h" + +/* +* MACROS +*/ +#define INVOCATION "Usage: %s [options] [file(s)]\n" + +#define CTAGS_ENVIRONMENT "CTAGS" +#define ETAGS_ENVIRONMENT "ETAGS" + +#define CTAGS_FILE "tags" +#define ETAGS_FILE "TAGS" + +#ifndef ETAGS +# define ETAGS "etags" /* name which causes default use of to -e */ +#endif + +/* The following separators are permitted for list options. + */ +#define EXTENSION_SEPARATOR '.' +#define PATTERN_START '(' +#define PATTERN_STOP ')' +#define IGNORE_SEPARATORS ", \t\n" + +#ifndef DEFAULT_FILE_FORMAT +# define DEFAULT_FILE_FORMAT 2 +#endif + +#if defined (HAVE_OPENDIR) || defined (HAVE_FINDFIRST) || defined (HAVE__FINDFIRST) || defined (AMIGA) +# define RECURSE_SUPPORTED +#endif + +#define isCompoundOption(c) (boolean) (strchr ("fohiILpDb", (c)) != NULL) + +/* +* Data declarations +*/ + +enum eOptionLimits { + MaxHeaderExtensions = 100, /* maximum number of extensions in -h option */ + MaxSupportedTagFormat = 2 +}; + +typedef struct sOptionDescription { + int usedByEtags; + const char *description; +} optionDescription; + +typedef void (*parametricOptionHandler) (const char *const option, const char *const parameter); + +typedef const struct { + const char* name; /* name of option as specified by user */ + parametricOptionHandler handler; /* routine to handle option */ + boolean initOnly; /* option must be specified before any files */ +} parametricOption; + +typedef const struct { + const char* name; /* name of option as specified by user */ + boolean* pValue; /* pointer to option value */ + boolean initOnly; /* option must be specified before any files */ +} booleanOption; + +/* +* DATA DEFINITIONS +*/ + +static boolean NonOptionEncountered; +static stringList *OptionFiles; +static stringList* Excluded; +static boolean FilesRequired = TRUE; +static boolean SkipConfiguration; + +static const char *const HeaderExtensions [] = { + "h", "H", "hh", "hpp", "hxx", "h++", "inc", "def", NULL +}; + +optionValues Option = { + { + FALSE, /* --extra=f */ + FALSE, /* --extra=q */ + TRUE, /* --file-scope */ + }, + { + FALSE, /* -fields=a */ + TRUE, /* -fields=f */ + FALSE, /* -fields=m */ + FALSE, /* -fields=i */ + TRUE, /* -fields=k */ + FALSE, /* -fields=z */ + FALSE, /* -fields=K */ + FALSE, /* -fields=l */ + FALSE, /* -fields=n */ + TRUE, /* -fields=s */ + FALSE, /* -fields=S */ + TRUE /* -fields=t */ + }, + NULL, /* -I */ + FALSE, /* -a */ + FALSE, /* -B */ + FALSE, /* -e */ +#ifdef MACROS_USE_PATTERNS + EX_PATTERN, /* -n, --excmd */ +#else + EX_MIX, /* -n, --excmd */ +#endif + FALSE, /* -R */ + SO_SORTED, /* -u, --sort */ + FALSE, /* -V */ + FALSE, /* -x */ + NULL, /* -L */ + NULL, /* -o */ + NULL, /* -h */ + NULL, /* --etags-include */ + DEFAULT_FILE_FORMAT,/* --format */ + FALSE, /* --if0 */ + FALSE, /* --kind-long */ + LANG_AUTO, /* --lang */ + TRUE, /* --links */ + FALSE, /* --filter */ + NULL, /* --filter-terminator */ + FALSE, /* --tag-relative */ + FALSE, /* --totals */ + FALSE, /* --line-directives */ +#ifdef DEBUG + 0, 0 /* -D, -b */ +#endif +}; + +/* +- Locally used only +*/ + +static optionDescription LongOptionDescription [] = { + {1," -a Append the tags to an existing tag file."}, +#ifdef DEBUG + {1," -b "}, + {1," Set break line."}, +#endif + {0," -B Use backward searching patterns (?...?)."}, +#ifdef DEBUG + {1," -D "}, + {1," Set debug level."}, +#endif + {0," -e Output tag file for use with Emacs."}, + {1," -f "}, + {1," Write tags to specified file. Value of \"-\" writes tags to stdout"}, + {1," [\"tags\"; or \"TAGS\" when -e supplied]."}, + {0," -F Use forward searching patterns (/.../) (default)."}, + {1," -h "}, + {1," Specify list of file extensions to be treated as include files."}, + {1," [\".h.H.hh.hpp.hxx.h++\"]."}, + {1," -I "}, + {1," A list of tokens to be specially handled is read from either the"}, + {1," command line or the specified file."}, + {1," -L "}, + {1," A list of source file names are read from the specified file."}, + {1," If specified as \"-\", then standard input is read."}, + {0," -n Equivalent to --excmd=number."}, + {0," -N Equivalent to --excmd=pattern."}, + {1," -o Alternative for -f."}, +#ifdef RECURSE_SUPPORTED + {1," -R Equivalent to --recurse."}, +#else + {1," -R Not supported on this platform."}, +#endif + {0," -u Equivalent to --sort=no."}, + {1," -V Equivalent to --verbose."}, + {1," -x Print a tabular cross reference file to standard output."}, + {1," --append=[yes|no]"}, + {1," Should tags should be appended to existing tag file [no]?"}, + {1," --etags-include=file"}, + {1," Include reference to 'file' in Emacs-style tag file (requires -e)."}, + {1," --exclude=pattern"}, + {1," Exclude files and directories matching 'pattern'."}, + {0," --excmd=number|pattern|mix"}, +#ifdef MACROS_USE_PATTERNS + {0," Uses the specified type of EX command to locate tags [pattern]."}, +#else + {0," Uses the specified type of EX command to locate tags [mix]."}, +#endif + {1," --extra=[+|-]flags"}, + {1," Include extra tag entries for selected information (flags: \"fq\")."}, + {1," --fields=[+|-]flags"}, + {1," Include selected extension fields (flags: \"afmikKlnsStz\") [fks]."}, + {1," --file-scope=[yes|no]"}, + {1," Should tags scoped only for a single file (e.g. \"static\" tags"}, + {1," be included in the output [yes]?"}, + {1," --filter=[yes|no]"}, + {1," Behave as a filter, reading file names from standard input and"}, + {1," writing tags to standard output [no]."}, + {1," --filter-terminator=string"}, + {1," Specify string to print to stdout following the tags for each file"}, + {1," parsed when --filter is enabled."}, + {0," --format=level"}, +#if DEFAULT_FILE_FORMAT == 1 + {0," Force output of specified tag file format [1]."}, +#else + {0," Force output of specified tag file format [2]."}, +#endif + {1," --help"}, + {1," Print this option summary."}, + {1," --if0=[yes|no]"}, + {1," Should C code within #if 0 conditional branches be parsed [no]?"}, + {1," ---kinds=[+|-]kinds"}, + {1," Enable/disable tag kinds for language ."}, + {1," --langdef=name"}, + {1," Define a new language to be parsed with regular expressions."}, + {1," --langmap=map(s)"}, + {1," Override default mapping of language to source file extension."}, + {1," --language-force=language"}, + {1," Force all files to be interpreted using specified language."}, + {1," --languages=[+|-]list"}, + {1," Restrict files scanned for tags to those mapped to langauges"}, + {1," specified in the comma-separated 'list'. The list can contain any"}, + {1," built-in or user-defined language [all]."}, + {1," --license"}, + {1," Print details of software license."}, + {0," --line-directives=[yes|no]"}, + {0," Should #line directives be processed [no]?"}, + {1," --links=[yes|no]"}, + {1," Indicate whether symbolic links should be followed [yes]."}, + {1," --list-kinds=[language|all]"}, + {1," Output a list of all tag kinds for specified language or all."}, + {1," --list-languages"}, + {1," Output list of supported languages."}, + {1," --list-maps=[language|all]"}, + {1," Output list of language mappings."}, + {1," --options=file"}, + {1," Specify file from which command line options should be read."}, + {1," --recurse=[yes|no]"}, +#ifdef RECURSE_SUPPORTED + {1," Recurse into directories supplied on command line [no]."}, +#else + {1," Not supported on this platform."}, +#endif +#ifdef HAVE_REGEX + {1," --regex-=/line_pattern/name_pattern/[flags]"}, + {1," Define regular expression for locating tags in specific language."}, +#endif + {0," --sort=[yes|no|foldcase]"}, + {0," Should tags be sorted (optionally ignoring case) [yes]?."}, + {0," --tag-relative=[yes|no]"}, + {0," Should paths be relative to location of tag file [no; yes when -e]?"}, + {1," --totals=[yes|no]"}, + {1," Print statistics about source and tag files [no]."}, + {1," --verbose=[yes|no]"}, + {1," Enable verbose messages describing actions on each source file."}, + {1," --version"}, + {1," Print version identifier to standard output."}, + {1, NULL} +}; + +static const char* const License1 = +"This program is free software; you can redistribute it and/or\n" +"modify it under the terms of the GNU General Public License\n" +"as published by the Free Software Foundation; either version 2\n" +"of the License, or (at your option) any later version.\n" +"\n"; +static const char* const License2 = +"This program is distributed in the hope that it will be useful,\n" +"but WITHOUT ANY WARRANTY; without even the implied warranty of\n" +"MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" +"GNU General Public License for more details.\n" +"\n" +"You should have received a copy of the GNU General Public License\n" +"along with this program; if not, write to the Free Software\n" +"Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.\n"; + +/* Contains a set of strings describing the set of "features" compiled into + * the code. + */ +static const char *const Features [] = { +#ifdef WIN32 + "win32", +#endif +#ifdef DJGPP + "msdos_32", +#else +# ifdef MSDOS + "msdos_16", +# endif +#endif +#ifdef OS2 + "os2", +#endif +#ifdef AMIGA + "amiga", +#endif +#ifdef VMS + "vms", +#endif +#ifdef HAVE_FNMATCH + "wildcards", +#endif +#ifdef HAVE_REGEX + "regex", +#endif +#ifndef EXTERNAL_SORT + "internal-sort", +#endif +#ifdef CUSTOM_CONFIGURATION_FILE + "custom-conf", +#endif +#if (defined (MSDOS) || defined (WIN32) || defined (OS2)) && defined (UNIX_PATH_SEPARATOR) + "unix-path-separator", +#endif +#ifdef DEBUG + "debug", +#endif + NULL +}; + +/* +* FUNCTION PROTOTYPES +*/ +static boolean parseFileOptions (const char *const fileName); + +/* +* FUNCTION DEFINITIONS +*/ + +extern void verbose (const char *const format, ...) +{ + if (Option.verbose) + { + va_list ap; + va_start (ap, format); + vprintf (format, ap); + va_end (ap); + } +} + +static char *stringCopy (const char *const string) +{ + char* result = NULL; + if (string != NULL) + result = eStrdup (string); + return result; +} + +static void freeString (char **const pString) +{ + if (*pString != NULL) + { + eFree (*pString); + *pString = NULL; + } +} + +extern void freeList (stringList** const pList) +{ + if (*pList != NULL) + { + stringListDelete (*pList); + *pList = NULL; + } +} + +extern void setDefaultTagFileName (void) +{ + if (Option.tagFileName != NULL) + ; /* accept given name */ + else if (Option.etags) + Option.tagFileName = stringCopy (ETAGS_FILE); + else + Option.tagFileName = stringCopy (CTAGS_FILE); +} + +extern boolean filesRequired (void) +{ + boolean result = FilesRequired; + if (Option.recurse) + result = FALSE; + return result; +} + +extern void checkOptions (void) +{ + const char* notice; + if (Option.xref) + { + notice = "xref output"; + if (Option.include.fileNames) + { + error (WARNING, "%s disables file name tags", notice); + Option.include.fileNames = FALSE; + } + } + if (Option.append) + { + notice = "append mode is not compatible with"; + if (isDestinationStdout ()) + error (FATAL, "%s tags to stdout", notice); + } + if (Option.filter) + { + notice = "filter mode"; + if (Option.printTotals) + { + error (WARNING, "%s disables totals", notice); + Option.printTotals = FALSE; + } + if (Option.tagFileName != NULL) + error (WARNING, "%s ignores output tag file name", notice); + } +} + +static void setEtagsMode (void) +{ + Option.etags = TRUE; + Option.sorted = SO_UNSORTED; + Option.lineDirectives = FALSE; + Option.tagRelative = TRUE; +} + +extern void testEtagsInvocation (void) +{ + char* const execName = eStrdup (getExecutableName ()); + char* const etags = eStrdup (ETAGS); +#ifdef CASE_INSENSITIVE_FILENAMES + toLowerString (execName); + toLowerString (etags); +#endif + if (strstr (execName, etags) != NULL) + { + verbose ("Running in etags mode\n"); + setEtagsMode (); + } + eFree (execName); + eFree (etags); +} + +/* + * Cooked argument parsing + */ + +static void parseShortOption (cookedArgs *const args) +{ + args->simple [0] = *args->shortOptions++; + args->simple [1] = '\0'; + args->item = args->simple; + if (! isCompoundOption (*args->simple)) + args->parameter = ""; + else if (*args->shortOptions == '\0') + { + argForth (args->args); + if (argOff (args->args)) + args->parameter = NULL; + else + args->parameter = argItem (args->args); + args->shortOptions = NULL; + } + else + { + args->parameter = args->shortOptions; + args->shortOptions = NULL; + } +} + +static void parseLongOption (cookedArgs *const args, const char *item) +{ + const char* const equal = strchr (item, '='); + if (equal == NULL) + { + args->item = eStrdup (item); /* FIXME: memory leak. */ + args->parameter = ""; + } + else + { + const size_t length = equal - item; + args->item = xMalloc (length + 1, char); /* FIXME: memory leak. */ + strncpy (args->item, item, length); + args->item [length] = '\0'; + args->parameter = equal + 1; + } + Assert (args->item != NULL); + Assert (args->parameter != NULL); +} + +static void cArgRead (cookedArgs *const current) +{ + char* item; + + Assert (current != NULL); + if (! argOff (current->args)) + { + item = argItem (current->args); + current->shortOptions = NULL; + Assert (item != NULL); + if (strncmp (item, "--", (size_t) 2) == 0) + { + current->isOption = TRUE; + current->longOption = TRUE; + parseLongOption (current, item + 2); + Assert (current->item != NULL); + Assert (current->parameter != NULL); + } + else if (*item == '-') + { + current->isOption = TRUE; + current->longOption = FALSE; + current->shortOptions = item + 1; + parseShortOption (current); + } + else + { + current->isOption = FALSE; + current->longOption = FALSE; + current->item = item; + current->parameter = NULL; + } + } +} + +extern cookedArgs* cArgNewFromString (const char* string) +{ + cookedArgs* const result = xMalloc (1, cookedArgs); + memset (result, 0, sizeof (cookedArgs)); + result->args = argNewFromString (string); + cArgRead (result); + return result; +} + +extern cookedArgs* cArgNewFromArgv (char* const* const argv) +{ + cookedArgs* const result = xMalloc (1, cookedArgs); + memset (result, 0, sizeof (cookedArgs)); + result->args = argNewFromArgv (argv); + cArgRead (result); + return result; +} + +extern cookedArgs* cArgNewFromFile (FILE* const fp) +{ + cookedArgs* const result = xMalloc (1, cookedArgs); + memset (result, 0, sizeof (cookedArgs)); + result->args = argNewFromFile (fp); + cArgRead (result); + return result; +} + +extern cookedArgs* cArgNewFromLineFile (FILE* const fp) +{ + cookedArgs* const result = xMalloc (1, cookedArgs); + memset (result, 0, sizeof (cookedArgs)); + result->args = argNewFromLineFile (fp); + cArgRead (result); + return result; +} + +extern void cArgDelete (cookedArgs* const current) +{ + Assert (current != NULL); + argDelete (current->args); + memset (current, 0, sizeof (cookedArgs)); + eFree (current); +} + +static boolean cArgOptionPending (cookedArgs* const current) +{ + boolean result = FALSE; + if (current->shortOptions != NULL) + if (*current->shortOptions != '\0') + result = TRUE; + return result; +} + +extern boolean cArgOff (cookedArgs* const current) +{ + Assert (current != NULL); + return (boolean) (argOff (current->args) && ! cArgOptionPending (current)); +} + +extern boolean cArgIsOption (cookedArgs* const current) +{ + Assert (current != NULL); + return current->isOption; +} + +extern const char* cArgItem (cookedArgs* const current) +{ + Assert (current != NULL); + return current->item; +} + +extern void cArgForth (cookedArgs* const current) +{ + Assert (current != NULL); + Assert (! cArgOff (current)); + if (cArgOptionPending (current)) + parseShortOption (current); + else + { + Assert (! argOff (current->args)); + argForth (current->args); + if (! argOff (current->args)) + cArgRead (current); + else + { + current->isOption = FALSE; + current->longOption = FALSE; + current->shortOptions = NULL; + current->item = NULL; + current->parameter = NULL; + } + } +} + +/* + * File extension and language mapping + */ + +static void addExtensionList ( + stringList *const slist, const char *const elist, const boolean clear) +{ + char *const extensionList = eStrdup (elist); + const char *extension = NULL; + boolean first = TRUE; + + if (clear) + { + verbose (" clearing\n"); + stringListClear (slist); + } + verbose (" adding: "); + if (elist != NULL && *elist != '\0') + { + extension = extensionList; + if (elist [0] == EXTENSION_SEPARATOR) + ++extension; + } + while (extension != NULL) + { + char *separator = strchr (extension, EXTENSION_SEPARATOR); + if (separator != NULL) + *separator = '\0'; + verbose ("%s%s", first ? "" : ", ", + *extension == '\0' ? "(NONE)" : extension); + stringListAdd (slist, vStringNewInit (extension)); + first = FALSE; + if (separator == NULL) + extension = NULL; + else + extension = separator + 1; + } + if (Option.verbose) + { + printf ("\n now: "); + stringListPrint (slist); + putchar ('\n'); + } + eFree (extensionList); +} + +static boolean isFalse (const char *parameter) +{ + return (boolean) ( + strcasecmp (parameter, "0" ) == 0 || + strcasecmp (parameter, "n" ) == 0 || + strcasecmp (parameter, "no" ) == 0 || + strcasecmp (parameter, "off") == 0); +} + +static boolean isTrue (const char *parameter) +{ + return (boolean) ( + strcasecmp (parameter, "1" ) == 0 || + strcasecmp (parameter, "y" ) == 0 || + strcasecmp (parameter, "yes") == 0 || + strcasecmp (parameter, "on" ) == 0); +} + +/* Determines whether the specified file name is considered to be a header + * file for the purposes of determining whether enclosed tags are global or + * static. + */ +extern boolean isIncludeFile (const char *const fileName) +{ + boolean result = FALSE; + const char *const extension = fileExtension (fileName); + if (Option.headerExt != NULL) + result = stringListExtensionMatched (Option.headerExt, extension); + return result; +} + +/* + * Specific option processing + */ + +static void processEtagsInclude ( + const char *const option, const char *const parameter) +{ + if (! Option.etags) + error (FATAL, "Etags must be enabled to use \"%s\" option", option); + else + { + vString *const file = vStringNewInit (parameter); + if (Option.etagsInclude == NULL) + Option.etagsInclude = stringListNew (); + stringListAdd (Option.etagsInclude, file); + FilesRequired = FALSE; + } +} + +static void processExcludeOption ( + const char *const option __unused__, const char *const parameter) +{ + const char *const fileName = parameter + 1; + if (parameter [0] == '\0') + freeList (&Excluded); + else if (parameter [0] == '@') + { + stringList* const sl = stringListNewFromFile (fileName); + if (sl == NULL) + error (FATAL | PERROR, "cannot open \"%s\"", fileName); + if (Excluded == NULL) + Excluded = sl; + else + stringListCombine (Excluded, sl); + verbose (" adding exclude patterns from %s\n", fileName); + } + else + { + vString *const item = vStringNewInit (parameter); + if (Excluded == NULL) + Excluded = stringListNew (); + stringListAdd (Excluded, item); + verbose (" adding exclude pattern: %s\n", parameter); + } +} + +extern boolean isExcludedFile (const char* const name) +{ + const char* base = baseFilename (name); + boolean result = FALSE; + if (Excluded != NULL) + { + result = stringListFileMatched (Excluded, base); + if (! result && name != base) + result = stringListFileMatched (Excluded, name); + } +#ifdef AMIGA + /* not a good solution, but the only one which works often */ + if (! result) + result = (boolean) (strcmp (name, TagFile.name) == 0); +#endif + return result; +} + +static void processExcmdOption ( + const char *const option, const char *const parameter) +{ + switch (*parameter) + { + case 'm': Option.locate = EX_MIX; break; + case 'n': Option.locate = EX_LINENUM; break; + case 'p': Option.locate = EX_PATTERN; break; + default: + error (FATAL, "Invalid value for \"%s\" option", option); + break; + } +} + +static void processExtraTagsOption ( + const char *const option, const char *const parameter) +{ + struct sInclude *const inc = &Option.include; + const char *p = parameter; + boolean mode = TRUE; + int c; + + if (*p != '+' && *p != '-') + { + inc->fileNames = FALSE; + inc->qualifiedTags = FALSE; +#if 0 + inc->fileScope = FALSE; +#endif + } + while ((c = *p++) != '\0') switch (c) + { + case '+': mode = TRUE; break; + case '-': mode = FALSE; break; + + case 'f': inc->fileNames = mode; break; + case 'q': inc->qualifiedTags = mode; break; +#if 0 + case 'F': inc->fileScope = mode; break; +#endif + + default: error(WARNING, "Unsupported parameter '%c' for \"%s\" option", + c, option); + break; + } +} + +static void processFieldsOption ( + const char *const option, const char *const parameter) +{ + struct sExtFields *field = &Option.extensionFields; + const char *p = parameter; + boolean mode = TRUE; + int c; + + if (*p != '+' && *p != '-') + { + field->access = FALSE; + field->fileScope = FALSE; + field->implementation = FALSE; + field->inheritance = FALSE; + field->kind = FALSE; + field->kindKey = FALSE; + field->kindLong = FALSE; + field->language = FALSE; + field->scope = FALSE; + field->typeRef = FALSE; + } + while ((c = *p++) != '\0') switch (c) + { + case '+': mode = TRUE; break; + case '-': mode = FALSE; break; + + case 'a': field->access = mode; break; + case 'f': field->fileScope = mode; break; + case 'm': field->implementation = mode; break; + case 'i': field->inheritance = mode; break; + case 'k': field->kind = mode; break; + case 'K': field->kindLong = mode; break; + case 'l': field->language = mode; break; + case 'n': field->lineNumber = mode; break; + case 's': field->scope = mode; break; + case 'S': field->signature = mode; break; + case 'z': field->kindKey = mode; break; + case 't': field->typeRef = mode; break; + + default: error(WARNING, "Unsupported parameter '%c' for \"%s\" option", + c, option); + break; + } +} + +static void processFilterTerminatorOption ( + const char *const option __unused__, const char *const parameter) +{ + freeString (&Option.filterTerminator); + Option.filterTerminator = stringCopy (parameter); +} + +static void processFormatOption ( + const char *const option, const char *const parameter) +{ + unsigned int format; + + if (sscanf (parameter, "%u", &format) < 1) + error (FATAL, "Invalid value for \"%s\" option",option); + else if (format <= (unsigned int) MaxSupportedTagFormat) + Option.tagFileFormat = format; + else + error (FATAL, "Unsupported value for \"%s\" option", option); +} + +static void printInvocationDescription (void) +{ + printf (INVOCATION, getExecutableName ()); +} + +static void printOptionDescriptions (const optionDescription *const optDesc) +{ + int i; + for (i = 0 ; optDesc [i].description != NULL ; ++i) + { + if (! Option.etags || optDesc [i].usedByEtags) + puts (optDesc [i].description); + } +} + +static void printFeatureList (void) +{ + int i; + + for (i = 0 ; Features [i] != NULL ; ++i) + { + if (i == 0) + printf (" Optional compiled features: "); + printf ("%s+%s", (i>0 ? ", " : ""), Features [i]); +#ifdef CUSTOM_CONFIGURATION_FILE + if (strcmp (Features [i], "custom-conf") == 0) + printf ("=%s", CUSTOM_CONFIGURATION_FILE); +#endif + } + if (i > 0) + putchar ('\n'); +} + +static void printProgramIdentification (void) +{ + printf ("%s %s, %s %s\n", + PROGRAM_NAME, PROGRAM_VERSION, + PROGRAM_COPYRIGHT, AUTHOR_NAME); + printf (" Compiled: %s, %s\n", __DATE__, __TIME__); + printf (" Addresses: <%s>, %s\n", AUTHOR_EMAIL, PROGRAM_URL); + printFeatureList (); +} + +static void processHelpOption ( + const char *const option __unused__, + const char *const parameter __unused__) +{ + printProgramIdentification (); + putchar ('\n'); + printInvocationDescription (); + putchar ('\n'); + printOptionDescriptions (LongOptionDescription); + exit (0); +} + +static void processLanguageForceOption ( + const char *const option, const char *const parameter) +{ + langType language; + if (strcasecmp (parameter, "auto") == 0) + language = LANG_AUTO; + else + language = getNamedLanguage (parameter); + + if (strcmp (option, "lang") == 0 || strcmp (option, "language") == 0) + error (WARNING, + "\"--%s\" option is obsolete; use \"--language-force\" instead", + option); + if (language == LANG_IGNORE) + error (FATAL, "Unknown language \"%s\" in \"%s\" option", parameter, option); + else + Option.language = language; +} +static char* skipPastMap (char* p) +{ + while (*p != EXTENSION_SEPARATOR && + *p != PATTERN_START && *p != ',' && *p != '\0') + ++p; + return p; +} + +/* Parses the mapping beginning at `map', adds it to the language map, and + * returns first character past the map. + */ +static char* addLanguageMap (const langType language, char* map) +{ + char* p = NULL; + const char first = *map; + if (first == EXTENSION_SEPARATOR) /* extension map */ + { + ++map; + p = skipPastMap (map); + if (*p == '\0') + { + verbose (" .%s", map); + addLanguageExtensionMap (language, map); + p = map + strlen (map); + } + else + { + const char separator = *p; + *p = '\0'; + verbose (" .%s", map); + addLanguageExtensionMap (language, map); + *p = separator; + } + } + else if (first == PATTERN_START) /* pattern map */ + { + ++map; + for (p = map ; *p != PATTERN_STOP && *p != '\0' ; ++p) + { + if (*p == '\\' && *(p + 1) == PATTERN_STOP) + ++p; + } + if (*p == '\0') + error (FATAL, "Unterminated file name pattern for %s language", + getLanguageName (language)); + else + { + *p++ = '\0'; + verbose (" (%s)", map); + addLanguagePatternMap (language, map); + } + } + else + error (FATAL, "Badly formed language map for %s language", + getLanguageName (language)); + return p; +} + +static char* processLanguageMap (char* map) +{ + char* const separator = strchr (map, ':'); + char* result = NULL; + if (separator != NULL) + { + langType language; + char *list = separator + 1; + boolean clear = FALSE; + *separator = '\0'; + language = getNamedLanguage (map); + if (language != LANG_IGNORE) + { + const char *const deflt = "default"; + char* p; + if (*list == '+') + ++list; + else + clear = TRUE; + for (p = list ; *p != ',' && *p != '\0' ; ++p) /*no-op*/ ; + if ((size_t) (p - list) == strlen (deflt) && + strncasecmp (list, deflt, p - list) == 0) + { + verbose (" Restoring default %s language map: ", getLanguageName (language)); + installLanguageMapDefault (language); + list = p; + } + else + { + if (clear) + { + verbose (" Setting %s language map:", getLanguageName (language)); + clearLanguageMap (language); + } + else + verbose (" Adding to %s language map:", getLanguageName (language)); + while (list != NULL && *list != '\0' && *list != ',') + list = addLanguageMap (language, list); + verbose ("\n"); + } + if (list != NULL && *list == ',') + ++list; + result = list; + } + } + return result; +} + +static void processLanguageMapOption ( + const char *const option, const char *const parameter) +{ + char *const maps = eStrdup (parameter); + char *map = maps; + + if (strcmp (parameter, "default") == 0) + { + verbose (" Restoring default language maps:\n"); + installLanguageMapDefaults (); + } + else while (map != NULL && *map != '\0') + { + char* const next = processLanguageMap (map); + if (next == NULL) + error (WARNING, "Unknown language \"%s\" in \"%s\" option", parameter, option); + map = next; + } + eFree (maps); +} + +static void processLanguagesOption ( + const char *const option, const char *const parameter) +{ + char *const langs = eStrdup (parameter); + enum { Add, Remove, Replace } mode = Replace; + boolean first = TRUE; + char *lang = langs; + const char* prefix = ""; + verbose (" Enabled languages: "); + while (lang != NULL) + { + char *const end = strchr (lang, ','); + if (lang [0] == '+') + { + ++lang; + mode = Add; + prefix = "+ "; + } + else if (lang [0] == '-') + { + ++lang; + mode = Remove; + prefix = "- "; + } + if (mode == Replace) + enableLanguages (FALSE); + if (end != NULL) + *end = '\0'; + if (lang [0] != '\0') + { + if (strcmp (lang, "all") == 0) + enableLanguages ((boolean) (mode != Remove)); + else + { + const langType language = getNamedLanguage (lang); + if (language == LANG_IGNORE) + error (WARNING, "Unknown language \"%s\" in \"%s\" option", lang, option); + else + enableLanguage (language, (boolean) (mode != Remove)); + } + verbose ("%s%s%s", (first ? "" : ", "), prefix, lang); + prefix = ""; + first = FALSE; + if (mode == Replace) + mode = Add; + } + lang = (end != NULL ? end + 1 : NULL); + } + verbose ("\n"); + eFree (langs); +} + +static void processLicenseOption ( + const char *const option __unused__, + const char *const parameter __unused__) +{ + printProgramIdentification (); + puts (""); + puts (License1); + puts (License2); + exit (0); +} + +static void processListKindsOption ( + const char *const option, const char *const parameter) +{ + if (parameter [0] == '\0' || strcasecmp (parameter, "all") == 0) + printLanguageKinds (LANG_AUTO); + else + { + langType language = getNamedLanguage (parameter); + if (language == LANG_IGNORE) + error (FATAL, "Unknown language \"%s\" in \"%s\" option", parameter, option); + else + printLanguageKinds (language); + } + exit (0); +} + +static void processListMapsOption ( + const char *const __unused__ option, + const char *const __unused__ parameter) +{ + if (parameter [0] == '\0' || strcasecmp (parameter, "all") == 0) + printLanguageMaps (LANG_AUTO); + else + { + langType language = getNamedLanguage (parameter); + if (language == LANG_IGNORE) + error (FATAL, "Unknown language \"%s\" in \"%s\" option", parameter, option); + else + printLanguageMaps (language); + } + exit (0); +} + +static void processListLanguagesOption ( + const char *const option __unused__, + const char *const parameter __unused__) +{ + printLanguageList (); + exit (0); +} + +static void processOptionFile ( + const char *const option, const char *const parameter) +{ + if (parameter [0] == '\0') + error (WARNING, "no option file supplied for \"%s\"", option); + else if (! parseFileOptions (parameter)) + error (FATAL | PERROR, "cannot open option file \"%s\"", parameter); +} + +static void processSortOption ( + const char *const option, const char *const parameter) +{ + if (isFalse (parameter)) + Option.sorted = SO_UNSORTED; + else if (isTrue (parameter)) + Option.sorted = SO_SORTED; + else if (strcasecmp (parameter, "f") == 0 || + strcasecmp (parameter, "fold") == 0 || + strcasecmp (parameter, "foldcase") == 0) + Option.sorted = SO_FOLDSORTED; + else + error (FATAL, "Invalid value for \"%s\" option", option); +} + +static void installHeaderListDefaults (void) +{ + Option.headerExt = stringListNewFromArgv (HeaderExtensions); + if (Option.verbose) + { + printf (" Setting default header extensions: "); + stringListPrint (Option.headerExt); + putchar ('\n'); + } +} + +static void processHeaderListOption (const int option, const char *parameter) +{ + /* Check to make sure that the user did not enter "ctags -h *.c" + * by testing to see if the list is a filename that exists. + */ + if (doesFileExist (parameter)) + error (FATAL, "-%c: Invalid list", option); + if (strcmp (parameter, "default") == 0) + installHeaderListDefaults (); + else + { + boolean clear = TRUE; + + if (parameter [0] == '+') + { + ++parameter; + clear = FALSE; + } + if (Option.headerExt == NULL) + Option.headerExt = stringListNew (); + verbose (" Header Extensions:\n"); + addExtensionList (Option.headerExt, parameter, clear); + } +} + +/* + * Token ignore processing + */ + +/* Determines whether or not "name" should be ignored, per the ignore list. + */ +extern boolean isIgnoreToken ( + const char *const name, boolean *const pIgnoreParens, + const char **const replacement) +{ + boolean result = FALSE; + + if (Option.ignore != NULL) + { + const size_t nameLen = strlen (name); + unsigned int i; + + if (pIgnoreParens != NULL) + *pIgnoreParens = FALSE; + + for (i = 0 ; i < stringListCount (Option.ignore) ; ++i) + { + vString *token = stringListItem (Option.ignore, i); + + if (strncmp (vStringValue (token), name, nameLen) == 0) + { + const size_t tokenLen = vStringLength (token); + + if (nameLen == tokenLen) + { + result = TRUE; + break; + } + else if (tokenLen == nameLen + 1 && + vStringChar (token, tokenLen - 1) == '+') + { + result = TRUE; + if (pIgnoreParens != NULL) + *pIgnoreParens = TRUE; + break; + } + else if (vStringChar (token, nameLen) == '=') + { + if (replacement != NULL) + *replacement = vStringValue (token) + nameLen + 1; + break; + } + } + } + } + return result; +} + +static void saveIgnoreToken (vString *const ignoreToken) +{ + if (Option.ignore == NULL) + Option.ignore = stringListNew (); + stringListAdd (Option.ignore, ignoreToken); + verbose (" ignore token: %s\n", vStringValue (ignoreToken)); +} + +static void readIgnoreList (const char *const list) +{ + char* newList = stringCopy (list); + const char *token = strtok (newList, IGNORE_SEPARATORS); + + while (token != NULL) + { + vString *const entry = vStringNewInit (token); + + saveIgnoreToken (entry); + token = strtok (NULL, IGNORE_SEPARATORS); + } + eFree (newList); +} + +static void addIgnoreListFromFile (const char *const fileName) +{ + stringList* tokens = stringListNewFromFile (fileName); + if (tokens == NULL) + error (FATAL | PERROR, "cannot open \"%s\"", fileName); + if (Option.ignore == NULL) + Option.ignore = tokens; + else + stringListCombine (Option.ignore, tokens); +} + +static void processIgnoreOption (const char *const list) +{ + if (strchr ("@./\\", list [0]) != NULL) + { + const char* fileName = (*list == '@') ? list + 1 : list; + addIgnoreListFromFile (fileName); + } +#if defined (MSDOS) || defined (WIN32) || defined (OS2) + else if (isalpha (list [0]) && list [1] == ':') + addIgnoreListFromFile (list); +#endif + else if (strcmp (list, "-") == 0) + { + freeList (&Option.ignore); + verbose (" clearing list\n"); + } + else + readIgnoreList (list); +} + +static void processVersionOption ( + const char *const option __unused__, + const char *const parameter __unused__) +{ + printProgramIdentification (); + exit (0); +} + +/* + * Option tables + */ + +static parametricOption ParametricOptions [] = { + { "etags-include", processEtagsInclude, FALSE }, + { "exclude", processExcludeOption, FALSE }, + { "excmd", processExcmdOption, FALSE }, + { "extra", processExtraTagsOption, FALSE }, + { "fields", processFieldsOption, FALSE }, + { "filter-terminator", processFilterTerminatorOption, TRUE }, + { "format", processFormatOption, TRUE }, + { "help", processHelpOption, TRUE }, + { "lang", processLanguageForceOption, FALSE }, + { "language", processLanguageForceOption, FALSE }, + { "language-force", processLanguageForceOption, FALSE }, + { "languages", processLanguagesOption, FALSE }, + { "langdef", processLanguageDefineOption, FALSE }, + { "langmap", processLanguageMapOption, FALSE }, + { "license", processLicenseOption, TRUE }, + { "list-kinds", processListKindsOption, TRUE }, + { "list-maps", processListMapsOption, TRUE }, + { "list-languages", processListLanguagesOption, TRUE }, + { "options", processOptionFile, FALSE }, + { "sort", processSortOption, TRUE }, + { "version", processVersionOption, TRUE }, +}; + +static booleanOption BooleanOptions [] = { + { "append", &Option.append, TRUE }, + { "file-scope", &Option.include.fileScope, FALSE }, + { "file-tags", &Option.include.fileNames, FALSE }, + { "filter", &Option.filter, TRUE }, + { "if0", &Option.if0, FALSE }, + { "kind-long", &Option.kindLong, TRUE }, + { "line-directives",&Option.lineDirectives, FALSE }, + { "links", &Option.followLinks, FALSE }, +#ifdef RECURSE_SUPPORTED + { "recurse", &Option.recurse, FALSE }, +#endif + { "tag-relative", &Option.tagRelative, TRUE }, + { "totals", &Option.printTotals, TRUE }, + { "verbose", &Option.verbose, FALSE }, +}; + +/* + * Generic option parsing + */ + +static void checkOptionOrder (const char* const option) +{ + if (NonOptionEncountered) + error (FATAL, "-%s option may not follow a file name", option); +} + +static boolean processParametricOption ( + const char *const option, const char *const parameter) +{ + const int count = sizeof (ParametricOptions) / sizeof (parametricOption); + boolean found = FALSE; + int i; + + for (i = 0 ; i < count && ! found ; ++i) + { + parametricOption* const entry = &ParametricOptions [i]; + if (strcmp (option, entry->name) == 0) + { + found = TRUE; + if (entry->initOnly) + checkOptionOrder (option); + (entry->handler) (option, parameter); + } + } + return found; +} + +static boolean getBooleanOption ( + const char *const option, const char *const parameter) +{ + boolean selection = TRUE; + + if (parameter [0] == '\0') + selection = TRUE; + else if (isFalse (parameter)) + selection = FALSE; + else if (isTrue (parameter)) + selection = TRUE; + else + error (FATAL, "Invalid value for \"%s\" option", option); + + return selection; +} + +static boolean processBooleanOption ( + const char *const option, const char *const parameter) +{ + const int count = sizeof (BooleanOptions) / sizeof (booleanOption); + boolean found = FALSE; + int i; + + for (i = 0 ; i < count && ! found ; ++i) + { + booleanOption* const entry = &BooleanOptions [i]; + if (strcmp (option, entry->name) == 0) + { + found = TRUE; + if (entry->initOnly) + checkOptionOrder (option); + *entry->pValue = getBooleanOption (option, parameter); + } + } + return found; +} + +static void processLongOption ( + const char *const option, const char *const parameter) +{ + Assert (parameter != NULL); + if (parameter == NULL && parameter [0] == '\0') + verbose (" Option: --%s\n", option); + else + verbose (" Option: --%s=%s\n", option, parameter); + + if (processBooleanOption (option, parameter)) + ; + else if (processParametricOption (option, parameter)) + ; + else if (processKindOption (option, parameter)) + ; + else if (processRegexOption (option, parameter)) + ; +#ifndef RECURSE_SUPPORTED + else if (strcmp (option, "recurse") == 0) + error (WARNING, "%s option not supported on this host", option); +#endif + else + error (FATAL, "Unknown option: --%s", option); +} + +static void processShortOption ( + const char *const option, const char *const parameter) +{ + if (parameter == NULL || parameter [0] == '\0') + verbose (" Option: -%s\n", option); + else + verbose (" Option: -%s %s\n", option, parameter); + + if (isCompoundOption (*option) && (parameter == NULL || parameter [0] == '\0')) + error (FATAL, "Missing parameter for \"%s\" option", option); + else switch (*option) + { + case '?': + processHelpOption ("?", NULL); + exit (0); + break; + case 'a': + checkOptionOrder (option); + Option.append = TRUE; + break; +#ifdef DEBUG + case 'b': + if (atol (parameter) < 0) + error (FATAL, "-%s: Invalid line number", option); + Option.breakLine = atol (parameter); + break; + case 'D': + Option.debugLevel = strtol (parameter, NULL, 0); + if (debug (DEBUG_STATUS)) + Option.verbose = TRUE; + break; +#endif + case 'B': + Option.backward = TRUE; + break; + case 'e': + checkOptionOrder (option); + setEtagsMode (); + break; + case 'f': + case 'o': + checkOptionOrder (option); + if (Option.tagFileName != NULL) + { + error (WARNING, + "-%s option specified more than once, last value used", + option); + freeString (&Option.tagFileName); + } + else if (parameter [0] == '-' && parameter [1] != '\0') + error (FATAL, "output file name may not begin with a '-'"); + Option.tagFileName = stringCopy (parameter); + break; + case 'F': + Option.backward = FALSE; + break; + case 'h': + processHeaderListOption (*option, parameter); + break; + case 'I': + processIgnoreOption (parameter); + break; + case 'L': + if (Option.fileList != NULL) + { + error (WARNING, + "-%s option specified more than once, last value used", + option); + freeString (&Option.fileList); + } + Option.fileList = stringCopy (parameter); + break; + case 'n': + Option.locate = EX_LINENUM; + break; + case 'N': + Option.locate = EX_PATTERN; + break; + case 'R': +#ifdef RECURSE_SUPPORTED + Option.recurse = TRUE; +#else + error (WARNING, "-%s option not supported on this host", option); +#endif + break; + case 'u': + checkOptionOrder (option); + Option.sorted = SO_UNSORTED; + break; + case 'V': + Option.verbose = TRUE; + break; + case 'w': + /* silently ignored */ + break; + case 'x': + checkOptionOrder (option); + Option.xref = TRUE; + break; + default: + error (FATAL, "Unknown option: -%s", option); + break; + } +} + +extern void parseOption (cookedArgs* const args) +{ + Assert (! cArgOff (args)); + if (args->isOption) + { + if (args->longOption) + processLongOption (args->item, args->parameter); + else + { + const char *parameter = args->parameter; + while (*parameter == ' ') + ++parameter; + processShortOption (args->item, parameter); + } + cArgForth (args); + } +} + +extern void parseOptions (cookedArgs* const args) +{ + NonOptionEncountered = FALSE; + while (! cArgOff (args) && cArgIsOption (args)) + parseOption (args); + if (! cArgOff (args) && ! cArgIsOption (args)) + NonOptionEncountered = TRUE; +} + +static const char *CheckFile; +static boolean checkSameFile (const char *const fileName) +{ + return isSameFile (CheckFile, fileName); +} + +static boolean parseFileOptions (const char* const fileName) +{ + boolean fileFound = FALSE; + const char* const format = "Considering option file %s: %s\n"; + CheckFile = fileName; + if (stringListHasTest (OptionFiles, checkSameFile)) + verbose (format, fileName, "already considered"); + else + { + FILE* const fp = fopen (fileName, "r"); + if (fp == NULL) + verbose (format, fileName, "not found"); + else + { + cookedArgs* const args = cArgNewFromLineFile (fp); + vString* file = vStringNewInit (fileName); + stringListAdd (OptionFiles, file); + verbose (format, fileName, "reading..."); + parseOptions (args); + if (NonOptionEncountered) + error (WARNING, "Ignoring non-option in %s\n", fileName); + cArgDelete (args); + fclose (fp); + fileFound = TRUE; + } + } + return fileFound; +} + +/* Actions to be taken before reading any other options */ +extern void previewFirstOption (cookedArgs* const args) +{ + while (cArgIsOption (args)) + { + if (strcmp (args->item, "V") == 0 || strcmp (args->item, "verbose") == 0) + parseOption (args); + else if (strcmp (args->item, "options") == 0 && + strcmp (args->parameter, "NONE") == 0) + { + fprintf (stderr, "No options will be read from files or environment\n"); + SkipConfiguration = TRUE; + cArgForth (args); + } + else + break; + } +} + +static void parseConfigurationFileOptionsInDirectoryWithLeafname (const char* directory, const char* leafname) +{ + vString* const pathname = combinePathAndFile (directory, leafname); + parseFileOptions (vStringValue (pathname)); + vStringDelete (pathname); +} + +static void parseConfigurationFileOptionsInDirectory (const char* directory) +{ + parseConfigurationFileOptionsInDirectoryWithLeafname (directory, ".ctags"); +#ifdef MSDOS_STYLE_PATH + parseConfigurationFileOptionsInDirectoryWithLeafname (directory, "ctags.cnf"); +#endif +} + +static void parseConfigurationFileOptions (void) +{ + /* We parse .ctags on all systems, and additionally ctags.cnf on DOS. */ + const char* const home = getenv ("HOME"); +#ifdef CUSTOM_CONFIGURATION_FILE + parseFileOptions (CUSTOM_CONFIGURATION_FILE); +#endif +#ifdef MSDOS_STYLE_PATH + parseFileOptions ("/ctags.cnf"); +#endif + parseFileOptions ("/etc/ctags.conf"); + parseFileOptions ("/usr/local/etc/ctags.conf"); + if (home != NULL) + { + parseConfigurationFileOptionsInDirectory (home); + } + else + { +#ifdef MSDOS_STYLE_PATH + /* + * Windows users don't usually set HOME. + * The OS sets HOMEDRIVE and HOMEPATH for them. + */ + const char* homeDrive = getenv ("HOMEDRIVE"); + const char* homePath = getenv ("HOMEPATH"); + if (homeDrive != NULL && homePath != NULL) + { + vString* const windowsHome = vStringNew (); + vStringCatS (windowsHome, homeDrive); + vStringCatS (windowsHome, homePath); + parseConfigurationFileOptionsInDirectory (vStringValue (windowsHome)); + vStringDelete (windowsHome); + } +#endif + } + parseConfigurationFileOptionsInDirectory ("."); +} + +static void parseEnvironmentOptions (void) +{ + const char *envOptions = NULL; + const char* var = NULL; + + if (Option.etags) + { + var = ETAGS_ENVIRONMENT; + envOptions = getenv (var); + } + if (envOptions == NULL) + { + var = CTAGS_ENVIRONMENT; + envOptions = getenv (var); + } + if (envOptions != NULL && envOptions [0] != '\0') + { + cookedArgs* const args = cArgNewFromString (envOptions); + verbose ("Reading options from $CTAGS\n"); + parseOptions (args); + cArgDelete (args); + if (NonOptionEncountered) + error (WARNING, "Ignoring non-option in %s variable", var); + } +} + +extern void readOptionConfiguration (void) +{ + if (! SkipConfiguration) + { + parseConfigurationFileOptions (); + parseEnvironmentOptions (); + } +} + +/* +* Option initialization +*/ + +extern void initOptions (void) +{ + OptionFiles = stringListNew (); + verbose ("Setting option defaults\n"); + installHeaderListDefaults (); + verbose (" Installing default language mappings:\n"); + installLanguageMapDefaults (); + + /* always excluded by default */ + verbose (" Installing default exclude patterns:\n"); + processExcludeOption (NULL, "{arch}"); + processExcludeOption (NULL, ".arch-ids"); + processExcludeOption (NULL, ".arch-inventory"); + processExcludeOption (NULL, "autom4te.cache"); + processExcludeOption (NULL, "BitKeeper"); + processExcludeOption (NULL, ".bzr"); + processExcludeOption (NULL, ".bzrignore"); + processExcludeOption (NULL, "CVS"); + processExcludeOption (NULL, ".cvsignore"); + processExcludeOption (NULL, "_darcs"); + processExcludeOption (NULL, ".deps"); + processExcludeOption (NULL, "EIFGEN"); + processExcludeOption (NULL, ".git"); + processExcludeOption (NULL, ".hg"); + processExcludeOption (NULL, "PENDING"); + processExcludeOption (NULL, "RCS"); + processExcludeOption (NULL, "RESYNC"); + processExcludeOption (NULL, "SCCS"); + processExcludeOption (NULL, ".svn"); +} + +extern void freeOptionResources (void) +{ + freeString (&Option.tagFileName); + freeString (&Option.fileList); + freeString (&Option.filterTerminator); + + freeList (&Excluded); + freeList (&Option.ignore); + freeList (&Option.headerExt); + freeList (&Option.etagsInclude); + freeList (&OptionFiles); +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/options.h b/options.h new file mode 100644 index 0000000..34150e7 --- /dev/null +++ b/options.h @@ -0,0 +1,154 @@ +/* +* $Id: options.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1998-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Defines external interface to option processing. +*/ +#ifndef _OPTIONS_H +#define _OPTIONS_H + +#if defined(OPTION_WRITE) || defined(VAXC) +# define CONST_OPTION +#else +# define CONST_OPTION const +#endif + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "args.h" +#include "parse.h" +#include "strlist.h" +#include "vstring.h" + +/* +* DATA DECLARATIONS +*/ + +typedef enum { OPTION_NONE, OPTION_SHORT, OPTION_LONG } optionType; + +typedef struct sCookedArgs { + /* private */ + Arguments* args; + char *shortOptions; + char simple[2]; + boolean isOption; + boolean longOption; + const char* parameter; + /* public */ + char* item; +} cookedArgs; + +typedef enum eLocate { + EX_MIX, /* line numbers for defines, patterns otherwise */ + EX_LINENUM, /* -n only line numbers in tag file */ + EX_PATTERN /* -N only patterns in tag file */ +} exCmd; + +typedef enum sortType { + SO_UNSORTED, + SO_SORTED, + SO_FOLDSORTED +} sortType; + +struct sInclude { + boolean fileNames; /* include tags for source file names */ + boolean qualifiedTags; /* include tags for qualified class members */ + boolean fileScope; /* include tags of file scope only */ +}; + +struct sExtFields { /* extension field content control */ + boolean access; + boolean fileScope; + boolean implementation; + boolean inheritance; + boolean kind; + boolean kindKey; + boolean kindLong; + boolean language; + boolean lineNumber; + boolean scope; + boolean signature; + boolean typeRef; +}; + +/* This stores the command line options. + */ +typedef struct sOptionValues { + struct sInclude include;/* --extra extra tag inclusion */ + struct sExtFields extensionFields;/* --fields extension field control */ + stringList* ignore; /* -I name of file containing tokens to ignore */ + boolean append; /* -a append to "tags" file */ + boolean backward; /* -B regexp patterns search backwards */ + boolean etags; /* -e output Emacs style tags file */ + exCmd locate; /* --excmd EX command used to locate tag */ + boolean recurse; /* -R recurse into directories */ + sortType sorted; /* -u,--sort sort tags */ + boolean verbose; /* -V verbose */ + boolean xref; /* -x generate xref output instead */ + char *fileList; /* -L name of file containing names of files */ + char *tagFileName; /* -o name of tags file */ + stringList* headerExt; /* -h header extensions */ + stringList* etagsInclude;/* --etags-include list of TAGS files to include*/ + unsigned int tagFileFormat;/* --format tag file format (level) */ + boolean if0; /* --if0 examine code within "#if 0" branch */ + boolean kindLong; /* --kind-long */ + langType language; /* --lang specified language override */ + boolean followLinks; /* --link follow symbolic links? */ + boolean filter; /* --filter behave as filter: files in, tags out */ + char* filterTerminator; /* --filter-terminator string to output */ + boolean tagRelative; /* --tag-relative file paths relative to tag file */ + boolean printTotals; /* --totals print cumulative statistics */ + boolean lineDirectives; /* --linedirectives process #line directives */ +#ifdef DEBUG + long debugLevel; /* -D debugging output */ + unsigned long breakLine;/* -b source line at which to call lineBreak() */ +#endif +} optionValues; + +/* +* GLOBAL VARIABLES +*/ +extern CONST_OPTION optionValues Option; + +/* +* FUNCTION PROTOTYPES +*/ +extern void verbose (const char *const format, ...) __printf__ (1, 2); +extern void freeList (stringList** const pString); +extern void setDefaultTagFileName (void); +extern void checkOptions (void); +extern boolean filesRequired (void); +extern void testEtagsInvocation (void); + +extern cookedArgs* cArgNewFromString (const char* string); +extern cookedArgs* cArgNewFromArgv (char* const* const argv); +extern cookedArgs* cArgNewFromFile (FILE* const fp); +extern cookedArgs* cArgNewFromLineFile (FILE* const fp); +extern void cArgDelete (cookedArgs* const current); +extern boolean cArgOff (cookedArgs* const current); +extern boolean cArgIsOption (cookedArgs* const current); +extern const char* cArgItem (cookedArgs* const current); +extern void cArgForth (cookedArgs* const current); + +extern boolean isExcludedFile (const char* const name); +extern boolean isIncludeFile (const char *const fileName); +extern boolean isIgnoreToken (const char *const name, boolean *const pIgnoreParens, const char **const replacement); +extern void parseOption (cookedArgs* const cargs); +extern void parseOptions (cookedArgs* const cargs); +extern void previewFirstOption (cookedArgs* const cargs); +extern void readOptionConfiguration (void); +extern void initOptions (void); +extern void freeOptionResources (void); + +#endif /* _OPTIONS_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/parse.c b/parse.c new file mode 100644 index 0000000..0b5e2c3 --- /dev/null +++ b/parse.c @@ -0,0 +1,677 @@ +/* +* $Id: parse.c 597 2007-07-31 05:35:30Z dhiebert $ +* +* Copyright (c) 1996-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for managing source languages and +* dispatching files to the appropriate language parser. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "debug.h" +#include "entry.h" +#include "main.h" +#define OPTION_WRITE +#include "options.h" +#include "parsers.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* +* DATA DEFINITIONS +*/ +static parserDefinitionFunc* BuiltInParsers[] = { PARSER_LIST }; +static parserDefinition** LanguageTable = NULL; +static unsigned int LanguageCount = 0; + +/* +* FUNCTION DEFINITIONS +*/ + +extern void makeSimpleTag ( + const vString* const name, kindOption* const kinds, const int kind) +{ + if (kinds [kind].enabled && name != NULL && vStringLength (name) > 0) + { + tagEntryInfo e; + initTagEntry (&e, vStringValue (name)); + + e.kindName = kinds [kind].name; + e.kind = kinds [kind].letter; + + makeTagEntry (&e); + } +} + +/* +* parserDescription mapping management +*/ + +extern parserDefinition* parserNew (const char* name) +{ + parserDefinition* result = xCalloc (1, parserDefinition); + result->name = eStrdup (name); + return result; +} + +extern const char *getLanguageName (const langType language) +{ + const char* result; + if (language == LANG_IGNORE) + result = "unknown"; + else + { + Assert (0 <= language && language < (int) LanguageCount); + result = LanguageTable [language]->name; + } + return result; +} + +extern langType getNamedLanguage (const char *const name) +{ + langType result = LANG_IGNORE; + unsigned int i; + Assert (name != NULL); + for (i = 0 ; i < LanguageCount && result == LANG_IGNORE ; ++i) + { + const parserDefinition* const lang = LanguageTable [i]; + if (lang->name != NULL) + if (strcasecmp (name, lang->name) == 0) + result = i; + } + return result; +} + +static langType getExtensionLanguage (const char *const extension) +{ + langType result = LANG_IGNORE; + unsigned int i; + for (i = 0 ; i < LanguageCount && result == LANG_IGNORE ; ++i) + { + stringList* const exts = LanguageTable [i]->currentExtensions; + if (exts != NULL && stringListExtensionMatched (exts, extension)) + result = i; + } + return result; +} + +static langType getPatternLanguage (const char *const fileName) +{ + langType result = LANG_IGNORE; + const char* base = baseFilename (fileName); + unsigned int i; + for (i = 0 ; i < LanguageCount && result == LANG_IGNORE ; ++i) + { + stringList* const ptrns = LanguageTable [i]->currentPatterns; + if (ptrns != NULL && stringListFileMatched (ptrns, base)) + result = i; + } + return result; +} + +#ifdef SYS_INTERPRETER + +/* The name of the language interpreter, either directly or as the argument + * to "env". + */ +static vString* determineInterpreter (const char* const cmd) +{ + vString* const interpreter = vStringNew (); + const char* p = cmd; + do + { + vStringClear (interpreter); + for ( ; isspace ((int) *p) ; ++p) + ; /* no-op */ + for ( ; *p != '\0' && ! isspace ((int) *p) ; ++p) + vStringPut (interpreter, (int) *p); + vStringTerminate (interpreter); + } while (strcmp (vStringValue (interpreter), "env") == 0); + return interpreter; +} + +static langType getInterpreterLanguage (const char *const fileName) +{ + langType result = LANG_IGNORE; + FILE* const fp = fopen (fileName, "r"); + if (fp != NULL) + { + vString* const vLine = vStringNew (); + const char* const line = readLine (vLine, fp); + if (line != NULL && line [0] == '#' && line [1] == '!') + { + const char* const lastSlash = strrchr (line, '/'); + const char *const cmd = lastSlash != NULL ? lastSlash+1 : line+2; + vString* const interpreter = determineInterpreter (cmd); + result = getExtensionLanguage (vStringValue (interpreter)); + if (result == LANG_IGNORE) + result = getNamedLanguage (vStringValue (interpreter)); + vStringDelete (interpreter); + } + vStringDelete (vLine); + fclose (fp); + } + return result; +} + +#endif + +extern langType getFileLanguage (const char *const fileName) +{ + langType language = Option.language; + if (language == LANG_AUTO) + { + language = getExtensionLanguage (fileExtension (fileName)); + if (language == LANG_IGNORE) + language = getPatternLanguage (fileName); +#ifdef SYS_INTERPRETER + if (language == LANG_IGNORE) + { + fileStatus *status = eStat (fileName); + if (status->isExecutable) + language = getInterpreterLanguage (fileName); + } +#endif + } + return language; +} + +extern void printLanguageMap (const langType language) +{ + boolean first = TRUE; + unsigned int i; + stringList* map = LanguageTable [language]->currentPatterns; + Assert (0 <= language && language < (int) LanguageCount); + for (i = 0 ; map != NULL && i < stringListCount (map) ; ++i) + { + printf ("%s(%s)", (first ? "" : " "), + vStringValue (stringListItem (map, i))); + first = FALSE; + } + map = LanguageTable [language]->currentExtensions; + for (i = 0 ; map != NULL && i < stringListCount (map) ; ++i) + { + printf ("%s.%s", (first ? "" : " "), + vStringValue (stringListItem (map, i))); + first = FALSE; + } +} + +extern void installLanguageMapDefault (const langType language) +{ + parserDefinition* lang; + Assert (0 <= language && language < (int) LanguageCount); + lang = LanguageTable [language]; + if (lang->currentPatterns != NULL) + stringListDelete (lang->currentPatterns); + if (lang->currentExtensions != NULL) + stringListDelete (lang->currentExtensions); + + if (lang->patterns == NULL) + lang->currentPatterns = stringListNew (); + else + { + lang->currentPatterns = + stringListNewFromArgv (lang->patterns); + } + if (lang->extensions == NULL) + lang->currentExtensions = stringListNew (); + else + { + lang->currentExtensions = + stringListNewFromArgv (lang->extensions); + } + if (Option.verbose) + printLanguageMap (language); + verbose ("\n"); +} + +extern void installLanguageMapDefaults (void) +{ + unsigned int i; + for (i = 0 ; i < LanguageCount ; ++i) + { + verbose (" %s: ", getLanguageName (i)); + installLanguageMapDefault (i); + } +} + +extern void clearLanguageMap (const langType language) +{ + Assert (0 <= language && language < (int) LanguageCount); + stringListClear (LanguageTable [language]->currentPatterns); + stringListClear (LanguageTable [language]->currentExtensions); +} + +extern void addLanguagePatternMap (const langType language, const char* ptrn) +{ + vString* const str = vStringNewInit (ptrn); + parserDefinition* lang; + Assert (0 <= language && language < (int) LanguageCount); + lang = LanguageTable [language]; + if (lang->currentPatterns == NULL) + lang->currentPatterns = stringListNew (); + stringListAdd (lang->currentPatterns, str); +} + +extern boolean removeLanguageExtensionMap (const char *const extension) +{ + boolean result = FALSE; + unsigned int i; + for (i = 0 ; i < LanguageCount && ! result ; ++i) + { + stringList* const exts = LanguageTable [i]->currentExtensions; + if (exts != NULL && stringListRemoveExtension (exts, extension)) + { + verbose (" (removed from %s)", getLanguageName (i)); + result = TRUE; + } + } + return result; +} + +extern void addLanguageExtensionMap ( + const langType language, const char* extension) +{ + vString* const str = vStringNewInit (extension); + Assert (0 <= language && language < (int) LanguageCount); + removeLanguageExtensionMap (extension); + stringListAdd (LanguageTable [language]->currentExtensions, str); +} + +extern void enableLanguage (const langType language, const boolean state) +{ + Assert (0 <= language && language < (int) LanguageCount); + LanguageTable [language]->enabled = state; +} + +extern void enableLanguages (const boolean state) +{ + unsigned int i; + for (i = 0 ; i < LanguageCount ; ++i) + enableLanguage (i, state); +} + +static void initializeParsers (void) +{ + unsigned int i; + for (i = 0 ; i < LanguageCount ; ++i) + if (LanguageTable [i]->initialize != NULL) + (LanguageTable [i]->initialize) ((langType) i); +} + +extern void initializeParsing (void) +{ + unsigned int builtInCount; + unsigned int i; + + builtInCount = sizeof (BuiltInParsers) / sizeof (BuiltInParsers [0]); + LanguageTable = xMalloc (builtInCount, parserDefinition*); + + verbose ("Installing parsers: "); + for (i = 0 ; i < builtInCount ; ++i) + { + parserDefinition* const def = (*BuiltInParsers [i]) (); + if (def != NULL) + { + boolean accepted = FALSE; + if (def->name == NULL || def->name[0] == '\0') + error (FATAL, "parser definition must contain name\n"); + else if (def->regex) + { +#ifdef HAVE_REGEX + def->parser = findRegexTags; + accepted = TRUE; +#endif + } + else if ((def->parser == NULL) == (def->parser2 == NULL)) + error (FATAL, + "%s parser definition must define one and only one parsing routine\n", + def->name); + else + accepted = TRUE; + if (accepted) + { + verbose ("%s%s", i > 0 ? ", " : "", def->name); + def->id = LanguageCount++; + LanguageTable [def->id] = def; + } + } + } + verbose ("\n"); + enableLanguages (TRUE); + initializeParsers (); +} + +extern void freeParserResources (void) +{ + unsigned int i; + for (i = 0 ; i < LanguageCount ; ++i) + { + parserDefinition* const lang = LanguageTable [i]; + freeList (&lang->currentPatterns); + freeList (&lang->currentExtensions); + eFree (lang->name); + lang->name = NULL; + eFree (lang); + } + if (LanguageTable != NULL) + eFree (LanguageTable); + LanguageTable = NULL; + LanguageCount = 0; +} + +/* +* Option parsing +*/ + +extern void processLanguageDefineOption ( + const char *const option, const char *const parameter __unused__) +{ +#ifdef HAVE_REGEX + if (parameter [0] == '\0') + error (WARNING, "No language specified for \"%s\" option", option); + else if (getNamedLanguage (parameter) != LANG_IGNORE) + error (WARNING, "Language \"%s\" already defined", parameter); + else + { + unsigned int i = LanguageCount++; + parserDefinition* const def = parserNew (parameter); + def->parser = findRegexTags; + def->currentPatterns = stringListNew (); + def->currentExtensions = stringListNew (); + def->regex = TRUE; + def->enabled = TRUE; + def->id = i; + LanguageTable = xRealloc (LanguageTable, i + 1, parserDefinition*); + LanguageTable [i] = def; + } +#else + error (WARNING, "regex support not available; required for --%s option", + option); +#endif +} + +static kindOption *langKindOption (const langType language, const int flag) +{ + unsigned int i; + kindOption* result = NULL; + const parserDefinition* lang; + Assert (0 <= language && language < (int) LanguageCount); + lang = LanguageTable [language]; + for (i=0 ; i < lang->kindCount && result == NULL ; ++i) + if (lang->kinds [i].letter == flag) + result = &lang->kinds [i]; + return result; +} + +static void disableLanguageKinds (const langType language) +{ + const parserDefinition* lang; + Assert (0 <= language && language < (int) LanguageCount); + lang = LanguageTable [language]; + if (lang->regex) + disableRegexKinds (language); + else + { + unsigned int i; + for (i = 0 ; i < lang->kindCount ; ++i) + lang->kinds [i].enabled = FALSE; + } +} + +static boolean enableLanguageKind ( + const langType language, const int kind, const boolean mode) +{ + boolean result = FALSE; + if (LanguageTable [language]->regex) + result = enableRegexKind (language, kind, mode); + else + { + kindOption* const opt = langKindOption (language, kind); + if (opt != NULL) + { + opt->enabled = mode; + result = TRUE; + } + } + return result; +} + +static void processLangKindOption ( + const langType language, const char *const option, + const char *const parameter) +{ + const char *p = parameter; + boolean mode = TRUE; + int c; + + Assert (0 <= language && language < (int) LanguageCount); + if (*p != '+' && *p != '-') + disableLanguageKinds (language); + while ((c = *p++) != '\0') switch (c) + { + case '+': mode = TRUE; break; + case '-': mode = FALSE; break; + default: + if (! enableLanguageKind (language, c, mode)) + error (WARNING, "Unsupported parameter '%c' for --%s option", + c, option); + break; + } +} + +extern boolean processKindOption ( + const char *const option, const char *const parameter) +{ + boolean handled = FALSE; + const char* const dash = strchr (option, '-'); + if (dash != NULL && + (strcmp (dash + 1, "kinds") == 0 || strcmp (dash + 1, "types") == 0)) + { + langType language; + vString* langName = vStringNew (); + vStringNCopyS (langName, option, dash - option); + language = getNamedLanguage (vStringValue (langName)); + if (language == LANG_IGNORE) + error (WARNING, "Unknown language \"%s\" in \"%s\" option", vStringValue (langName), option); + else + processLangKindOption (language, option, parameter); + vStringDelete (langName); + handled = TRUE; + } + return handled; +} + +static void printLanguageKind (const kindOption* const kind, boolean indent) +{ + const char *const indentation = indent ? " " : ""; + printf ("%s%c %s%s\n", indentation, kind->letter, + kind->description != NULL ? kind->description : + (kind->name != NULL ? kind->name : ""), + kind->enabled ? "" : " [off]"); +} + +static void printKinds (langType language, boolean indent) +{ + const parserDefinition* lang; + Assert (0 <= language && language < (int) LanguageCount); + lang = LanguageTable [language]; + if (lang->kinds != NULL || lang->regex) + { + unsigned int i; + for (i = 0 ; i < lang->kindCount ; ++i) + printLanguageKind (lang->kinds + i, indent); + printRegexKinds (language, indent); + } +} + +extern void printLanguageKinds (const langType language) +{ + if (language == LANG_AUTO) + { + unsigned int i; + for (i = 0 ; i < LanguageCount ; ++i) + { + const parserDefinition* const lang = LanguageTable [i]; + printf ("%s%s\n", lang->name, lang->enabled ? "" : " [disabled]"); + printKinds (i, TRUE); + } + } + else + printKinds (language, FALSE); +} + +static void printMaps (const langType language) +{ + const parserDefinition* lang; + unsigned int i; + Assert (0 <= language && language < (int) LanguageCount); + lang = LanguageTable [language]; + printf ("%-8s", lang->name); + if (lang->currentExtensions != NULL) + for (i = 0 ; i < stringListCount (lang->currentExtensions) ; ++i) + printf (" *.%s", vStringValue ( + stringListItem (lang->currentExtensions, i))); + if (lang->currentPatterns != NULL) + for (i = 0 ; i < stringListCount (lang->currentPatterns) ; ++i) + printf (" %s", vStringValue ( + stringListItem (lang->currentPatterns, i))); + putchar ('\n'); +} + +extern void printLanguageMaps (const langType language) +{ + if (language == LANG_AUTO) + { + unsigned int i; + for (i = 0 ; i < LanguageCount ; ++i) + printMaps (i); + } + else + printMaps (language); +} + +static void printLanguage (const langType language) +{ + const parserDefinition* lang; + Assert (0 <= language && language < (int) LanguageCount); + lang = LanguageTable [language]; + if (lang->kinds != NULL || lang->regex) + printf ("%s%s\n", lang->name, lang->enabled ? "" : " [disabled]"); +} + +extern void printLanguageList (void) +{ + unsigned int i; + for (i = 0 ; i < LanguageCount ; ++i) + printLanguage (i); +} + +/* +* File parsing +*/ + +static void makeFileTag (const char *const fileName) +{ + if (Option.include.fileNames) + { + tagEntryInfo tag; + initTagEntry (&tag, baseFilename (fileName)); + + tag.isFileEntry = TRUE; + tag.lineNumberEntry = TRUE; + tag.lineNumber = 1; + tag.kindName = "file"; + tag.kind = 'F'; + + makeTagEntry (&tag); + } +} + +static boolean createTagsForFile ( + const char *const fileName, const langType language, + const unsigned int passCount) +{ + boolean retried = FALSE; + Assert (0 <= language && language < (int) LanguageCount); + if (fileOpen (fileName, language)) + { + const parserDefinition* const lang = LanguageTable [language]; + if (Option.etags) + beginEtagsFile (); + + makeFileTag (fileName); + + if (lang->parser != NULL) + lang->parser (); + else if (lang->parser2 != NULL) + retried = lang->parser2 (passCount); + + if (Option.etags) + endEtagsFile (getSourceFileTagPath ()); + + fileClose (); + } + + return retried; +} + +static boolean createTagsWithFallback ( + const char *const fileName, const langType language) +{ + const unsigned long numTags = TagFile.numTags.added; + fpos_t tagFilePosition; + unsigned int passCount = 0; + boolean tagFileResized = FALSE; + + fgetpos (TagFile.fp, &tagFilePosition); + while (createTagsForFile (fileName, language, ++passCount)) + { + /* Restore prior state of tag file. + */ + fsetpos (TagFile.fp, &tagFilePosition); + TagFile.numTags.added = numTags; + tagFileResized = TRUE; + } + return tagFileResized; +} + +extern boolean parseFile (const char *const fileName) +{ + boolean tagFileResized = FALSE; + langType language = Option.language; + if (Option.language == LANG_AUTO) + language = getFileLanguage (fileName); + Assert (language != LANG_AUTO); + if (language == LANG_IGNORE) + verbose ("ignoring %s (unknown language)\n", fileName); + else if (! LanguageTable [language]->enabled) + verbose ("ignoring %s (language disabled)\n", fileName); + else + { + if (Option.filter) + openTagFile (); + + tagFileResized = createTagsWithFallback (fileName, language); + + if (Option.filter) + closeTagFile (tagFileResized); + addTotals (1, 0L, 0L); + + return tagFileResized; + } + return tagFileResized; +} + +/* vi:set tabstop=4 shiftwidth=4 nowrap: */ diff --git a/parse.h b/parse.h new file mode 100644 index 0000000..1dbff35 --- /dev/null +++ b/parse.h @@ -0,0 +1,129 @@ +/* +* $Id: parse.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1998-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Private definitions for parsing support. +*/ +#ifndef _PARSE_H +#define _PARSE_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ +#include "parsers.h" /* contains list of parsers */ +#include "strlist.h" + +/* +* MACROS +*/ +#define KIND_COUNT(kindTable) (sizeof(kindTable)/sizeof(kindOption)) + +#define LANG_AUTO (-1) +#define LANG_IGNORE (-2) + +/* +* DATA DECLARATIONS +*/ +typedef int langType; + +typedef void (*createRegexTag) (const vString* const name); +typedef void (*simpleParser) (void); +typedef boolean (*rescanParser) (const unsigned int passCount); +typedef void (*parserInitialize) (langType language); + +typedef struct sKindOption { + boolean enabled; /* are tags for kind enabled? */ + int letter; /* kind letter */ + const char* name; /* kind name */ + const char* description; /* displayed in --help output */ +} kindOption; + +typedef struct { + /* defined by parser */ + char* name; /* name of language */ + kindOption* kinds; /* tag kinds handled by parser */ + unsigned int kindCount; /* size of `kinds' list */ + const char *const *extensions; /* list of default extensions */ + const char *const *patterns; /* list of default file name patterns */ + parserInitialize initialize; /* initialization routine, if needed */ + simpleParser parser; /* simple parser (common case) */ + rescanParser parser2; /* rescanning parser (unusual case) */ + boolean regex; /* is this a regex parser? */ + + /* used internally */ + unsigned int id; /* id assigned to language */ + boolean enabled; /* currently enabled? */ + stringList* currentPatterns; /* current list of file name patterns */ + stringList* currentExtensions; /* current list of extensions */ +} parserDefinition; + +typedef parserDefinition* (parserDefinitionFunc) (void); + +typedef struct { + size_t start; /* character index in line where match starts */ + size_t length; /* length of match */ +} regexMatch; + +typedef void (*regexCallback) (const char *line, const regexMatch *matches, unsigned int count); + +/* +* FUNCTION PROTOTYPES +*/ + +/* Each parsers' definition function is called. The routine is expected to + * return a structure allocated using parserNew(). This structure must, + * at minimum, set the `parser' field. + */ +extern parserDefinitionFunc PARSER_LIST; + +/* Legacy interface */ +extern boolean includingDefineTags (void); + +/* Language processing and parsing */ +extern void makeSimpleTag (const vString* const name, kindOption* const kinds, const int kind); +extern parserDefinition* parserNew (const char* name); +extern const char *getLanguageName (const langType language); +extern langType getNamedLanguage (const char *const name); +extern langType getFileLanguage (const char *const fileName); +extern void installLanguageMapDefault (const langType language); +extern void installLanguageMapDefaults (void); +extern void clearLanguageMap (const langType language); +extern boolean removeLanguageExtensionMap (const char *const extension); +extern void addLanguageExtensionMap (const langType language, const char* extension); +extern void addLanguagePatternMap (const langType language, const char* ptrn); +extern void printLanguageMap (const langType language); +extern void printLanguageMaps (const langType language); +extern void enableLanguages (const boolean state); +extern void enableLanguage (const langType language, const boolean state); +extern void initializeParsing (void); +extern void freeParserResources (void); +extern void processLanguageDefineOption (const char *const option, const char *const parameter); +extern boolean processKindOption (const char *const option, const char *const parameter); +extern void printKindOptions (void); +extern void printLanguageKinds (const langType language); +extern void printLanguageList (void); +extern boolean parseFile (const char *const fileName); + +/* Regex interface */ +#ifdef HAVE_REGEX +extern void findRegexTags (void); +extern boolean matchRegex (const vString* const line, const langType language); +#endif +extern boolean processRegexOption (const char *const option, const char *const parameter); +extern void addLanguageRegex (const langType language, const char* const regex); +extern void addTagRegex (const langType language, const char* const regex, const char* const name, const char* const kinds, const char* const flags); +extern void addCallbackRegex (const langType language, const char *const regex, const char *const flags, const regexCallback callback); +extern void disableRegexKinds (const langType language); +extern boolean enableRegexKind (const langType language, const int kind, const boolean mode); +extern void printRegexKinds (const langType language, boolean indent); +extern void freeRegexResources (void); +extern void checkRegex (void); + +#endif /* _PARSE_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/parsers.h b/parsers.h new file mode 100644 index 0000000..3dcc8ae --- /dev/null +++ b/parsers.h @@ -0,0 +1,63 @@ +/* +* $Id: parsers.h 717 2009-07-07 03:40:50Z dhiebert $ +* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to all language parsing modules. +* +* To add a new language parser, you need only modify this single source +* file to add the name of the parser definition function. +*/ +#ifndef _PARSERS_H +#define _PARSERS_H + +/* Add the name of any new parser definition function here */ +#define PARSER_LIST \ + AntParser, \ + AsmParser, \ + AspParser, \ + AwkParser, \ + BasicParser, \ + BetaParser, \ + CParser, \ + CppParser, \ + CsharpParser, \ + CobolParser, \ + DosBatchParser, \ + EiffelParser, \ + ErlangParser, \ + FlexParser, \ + FortranParser, \ + HtmlParser, \ + JavaParser, \ + JavaScriptParser, \ + LispParser, \ + LuaParser, \ + MakefileParser, \ + MatLabParser, \ + OcamlParser, \ + PascalParser, \ + PerlParser, \ + PhpParser, \ + PythonParser, \ + RexxParser, \ + RubyParser, \ + SchemeParser, \ + ShParser, \ + SlangParser, \ + SmlParser, \ + SqlParser, \ + TclParser, \ + TexParser, \ + VeraParser, \ + VerilogParser, \ + VhdlParser, \ + VimParser, \ + YaccParser + +#endif /* _PARSERS_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/pascal.c b/pascal.c new file mode 100644 index 0000000..9a50ba7 --- /dev/null +++ b/pascal.c @@ -0,0 +1,267 @@ +/* +* $Id: pascal.c 536 2007-06-02 06:09:00Z elliotth $ +* +* Copyright (c) 2001-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for the Pascal language, +* including some extensions for Object Pascal. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "entry.h" +#include "parse.h" +#include "read.h" +#include "vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_FUNCTION, K_PROCEDURE +} pascalKind; + +static kindOption PascalKinds [] = { + { TRUE, 'f', "function", "functions"}, + { TRUE, 'p', "procedure", "procedures"} +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static void createPascalTag ( + tagEntryInfo* const tag, const vString* const name, const int kind) +{ + if (PascalKinds [kind].enabled && name != NULL && vStringLength (name) > 0) + { + initTagEntry (tag, vStringValue (name)); + tag->kindName = PascalKinds [kind].name; + tag->kind = PascalKinds [kind].letter; + } + else + initTagEntry (tag, NULL); +} + +static void makePascalTag (const tagEntryInfo* const tag) +{ + if (tag->name != NULL) + makeTagEntry (tag); +} + +static const unsigned char* dbp; + +#define starttoken(c) (isalpha ((int) c) || (int) c == '_') +#define intoken(c) (isalnum ((int) c) || (int) c == '_' || (int) c == '.') +#define endtoken(c) (! intoken (c) && ! isdigit ((int) c)) + +static boolean tail (const char *cp) +{ + boolean result = FALSE; + register int len = 0; + + while (*cp != '\0' && tolower ((int) *cp) == tolower ((int) dbp [len])) + cp++, len++; + if (*cp == '\0' && !intoken (dbp [len])) + { + dbp += len; + result = TRUE; + } + return result; +} + +/* Algorithm adapted from from GNU etags. + * Locates tags for procedures & functions. Doesn't do any type- or + * var-definitions. It does look for the keyword "extern" or "forward" + * immediately following the procedure statement; if found, the tag is + * skipped. + */ +static void findPascalTags (void) +{ + vString *name = vStringNew (); + tagEntryInfo tag; + pascalKind kind = K_FUNCTION; + /* each of these flags is TRUE iff: */ + boolean incomment = FALSE; /* point is inside a comment */ + int comment_char = '\0'; /* type of current comment */ + boolean inquote = FALSE; /* point is inside '..' string */ + boolean get_tagname = FALSE;/* point is after PROCEDURE/FUNCTION + keyword, so next item = potential tag */ + boolean found_tag = FALSE; /* point is after a potential tag */ + boolean inparms = FALSE; /* point is within parameter-list */ + boolean verify_tag = FALSE; + /* point has passed the parm-list, so the next token will determine + * whether this is a FORWARD/EXTERN to be ignored, or whether it is a + * real tag + */ + + dbp = fileReadLine (); + while (dbp != NULL) + { + int c = *dbp++; + + if (c == '\0') /* if end of line */ + { + dbp = fileReadLine (); + if (dbp == NULL || *dbp == '\0') + continue; + if (!((found_tag && verify_tag) || get_tagname)) + c = *dbp++; + /* only if don't need *dbp pointing to the beginning of + * the name of the procedure or function + */ + } + if (incomment) + { + if (comment_char == '{' && c == '}') + incomment = FALSE; + else if (comment_char == '(' && c == '*' && *dbp == ')') + { + dbp++; + incomment = FALSE; + } + continue; + } + else if (inquote) + { + if (c == '\'') + inquote = FALSE; + continue; + } + else switch (c) + { + case '\'': + inquote = TRUE; /* found first quote */ + continue; + case '{': /* found open { comment */ + incomment = TRUE; + comment_char = c; + continue; + case '(': + if (*dbp == '*') /* found open (* comment */ + { + incomment = TRUE; + comment_char = c; + dbp++; + } + else if (found_tag) /* found '(' after tag, i.e., parm-list */ + inparms = TRUE; + continue; + case ')': /* end of parms list */ + if (inparms) + inparms = FALSE; + continue; + case ';': + if (found_tag && !inparms) /* end of proc or fn stmt */ + { + verify_tag = TRUE; + break; + } + continue; + } + if (found_tag && verify_tag && *dbp != ' ') + { + /* check if this is an "extern" declaration */ + if (*dbp == '\0') + continue; + if (tolower ((int) *dbp == 'e')) + { + if (tail ("extern")) /* superfluous, really! */ + { + found_tag = FALSE; + verify_tag = FALSE; + } + } + else if (tolower ((int) *dbp) == 'f') + { + if (tail ("forward")) /* check for forward reference */ + { + found_tag = FALSE; + verify_tag = FALSE; + } + } + if (found_tag && verify_tag) /* not external proc, so make tag */ + { + found_tag = FALSE; + verify_tag = FALSE; + makePascalTag (&tag); + continue; + } + } + if (get_tagname) /* grab name of proc or fn */ + { + const unsigned char *cp; + + if (*dbp == '\0') + continue; + + /* grab block name */ + while (isspace ((int) *dbp)) + ++dbp; + for (cp = dbp ; *cp != '\0' && !endtoken (*cp) ; cp++) + continue; + vStringNCopyS (name, (const char*) dbp, cp - dbp); + createPascalTag (&tag, name, kind); + dbp = cp; /* set dbp to e-o-token */ + get_tagname = FALSE; + found_tag = TRUE; + /* and proceed to check for "extern" */ + } + else if (!incomment && !inquote && !found_tag) + { + switch (tolower ((int) c)) + { + case 'c': + if (tail ("onstructor")) + { + get_tagname = TRUE; + kind = K_PROCEDURE; + } + break; + case 'd': + if (tail ("estructor")) + { + get_tagname = TRUE; + kind = K_PROCEDURE; + } + break; + case 'p': + if (tail ("rocedure")) + { + get_tagname = TRUE; + kind = K_PROCEDURE; + } + break; + case 'f': + if (tail ("unction")) + { + get_tagname = TRUE; + kind = K_FUNCTION; + } + break; + } + } /* while not eof */ + } + vStringDelete (name); +} + +extern parserDefinition* PascalParser (void) +{ + static const char *const extensions [] = { "p", "pas", NULL }; + parserDefinition* def = parserNew ("Pascal"); + def->extensions = extensions; + def->kinds = PascalKinds; + def->kindCount = KIND_COUNT (PascalKinds); + def->parser = findPascalTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/perl.c b/perl.c new file mode 100644 index 0000000..7c3e932 --- /dev/null +++ b/perl.c @@ -0,0 +1,382 @@ +/* +* $Id: perl.c 601 2007-08-02 04:45:16Z perlguy0 $ +* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for PERL language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "entry.h" +#include "options.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +#define TRACE_PERL_C 0 +#define TRACE if (TRACE_PERL_C) printf("perl.c:%d: ", __LINE__), printf + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_NONE = -1, + K_CONSTANT, + K_FORMAT, + K_LABEL, + K_PACKAGE, + K_SUBROUTINE, + K_SUBROUTINE_DECLARATION +} perlKind; + +static kindOption PerlKinds [] = { + { TRUE, 'c', "constant", "constants" }, + { TRUE, 'f', "format", "formats" }, + { TRUE, 'l', "label", "labels" }, + { TRUE, 'p', "package", "packages" }, + { TRUE, 's', "subroutine", "subroutines" }, + { FALSE, 'd', "subroutine declaration", "subroutine declarations" }, +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static boolean isIdentifier1 (int c) +{ + return (boolean) (isalpha (c) || c == '_'); +} + +static boolean isIdentifier (int c) +{ + return (boolean) (isalnum (c) || c == '_'); +} + +static boolean isPodWord (const char *word) +{ + boolean result = FALSE; + if (isalpha (*word)) + { + const char *const pods [] = { + "head1", "head2", "head3", "head4", "over", "item", "back", + "pod", "begin", "end", "for" + }; + const size_t count = sizeof (pods) / sizeof (pods [0]); + const char *white = strpbrk (word, " \t"); + const size_t len = (white!=NULL) ? (size_t)(white-word) : strlen (word); + char *const id = (char*) eMalloc (len + 1); + size_t i; + strncpy (id, word, len); + id [len] = '\0'; + for (i = 0 ; i < count && ! result ; ++i) + { + if (strcmp (id, pods [i]) == 0) + result = TRUE; + } + eFree (id); + } + return result; +} + +/* + * Perl subroutine declaration may look like one of the following: + * + * sub abc; + * sub abc :attr; + * sub abc (proto); + * sub abc (proto) :attr; + * + * Note that there may be more than one attribute. Attributes may + * have things in parentheses (they look like arguments). Anything + * inside of those parentheses goes. Prototypes may contain semi-colons. + * The matching end when we encounter (outside of any parentheses) either + * a semi-colon (that'd be a declaration) or an left curly brace + * (definition). + * + * This is pretty complicated parsing (plus we all know that only perl can + * parse Perl), so we are only promising best effort here. + * + * If we can't determine what this is (due to a file ending, for example), + * we will return FALSE. + */ +static boolean isSubroutineDeclaration (const unsigned char *cp) +{ + boolean attr = FALSE; + int nparens = 0; + + do { + for ( ; *cp; ++cp) { +SUB_DECL_SWITCH: + switch (*cp) { + case ':': + if (nparens) + break; + else if (TRUE == attr) + return FALSE; /* Invalid attribute name */ + else + attr = TRUE; + break; + case '(': + ++nparens; + break; + case ')': + --nparens; + break; + case ' ': + case '\t': + break; + case ';': + if (!nparens) + return TRUE; + case '{': + if (!nparens) + return FALSE; + default: + if (attr) { + if (isIdentifier1(*cp)) { + cp++; + while (isIdentifier (*cp)) + cp++; + attr = FALSE; + goto SUB_DECL_SWITCH; /* Instead of --cp; */ + } else { + return FALSE; + } + } else if (nparens) { + break; + } else { + return FALSE; + } + } + } + } while (NULL != (cp = fileReadLine ())); + + return FALSE; +} + +/* Algorithm adapted from from GNU etags. + * Perl support by Bart Robinson + * Perl sub names: look for /^ [ \t\n]sub [ \t\n]+ [^ \t\n{ (]+/ + */ +static void findPerlTags (void) +{ + vString *name = vStringNew (); + vString *package = NULL; + boolean skipPodDoc = FALSE; + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + boolean spaceRequired = FALSE; + boolean qualified = FALSE; + const unsigned char *cp = line; + perlKind kind = K_NONE; + tagEntryInfo e; + + if (skipPodDoc) + { + if (strncmp ((const char*) line, "=cut", (size_t) 4) == 0) + skipPodDoc = FALSE; + continue; + } + else if (line [0] == '=') + { + skipPodDoc = isPodWord ((const char*)line + 1); + continue; + } + else if (strcmp ((const char*) line, "__DATA__") == 0) + break; + else if (strcmp ((const char*) line, "__END__") == 0) + break; + else if (line [0] == '#') + continue; + + while (isspace (*cp)) + cp++; + + if (strncmp((const char*) cp, "sub", (size_t) 3) == 0) + { + TRACE("this looks like a sub\n"); + cp += 3; + kind = K_SUBROUTINE; + spaceRequired = TRUE; + qualified = TRUE; + } + else if (strncmp((const char*) cp, "use", (size_t) 3) == 0) + { + cp += 3; + if (!isspace(*cp)) + continue; + while (*cp && isspace (*cp)) + ++cp; + if (strncmp((const char*) cp, "constant", (size_t) 8) != 0) + continue; + cp += 8; + kind = K_CONSTANT; + spaceRequired = TRUE; + qualified = TRUE; + } + else if (strncmp((const char*) cp, "package", (size_t) 7) == 0) + { + /* This will point to space after 'package' so that a tag + can be made */ + const unsigned char *space = cp += 7; + + if (package == NULL) + package = vStringNew (); + else + vStringClear (package); + while (isspace (*cp)) + cp++; + while ((int) *cp != ';' && !isspace ((int) *cp)) + { + vStringPut (package, (int) *cp); + cp++; + } + vStringCatS (package, "::"); + + cp = space; /* Rewind */ + kind = K_PACKAGE; + spaceRequired = TRUE; + qualified = TRUE; + } + else if (strncmp((const char*) cp, "format", (size_t) 6) == 0) + { + cp += 6; + kind = K_FORMAT; + spaceRequired = TRUE; + qualified = TRUE; + } + else + { + if (isIdentifier1 (*cp)) + { + const unsigned char *p = cp; + while (isIdentifier (*p)) + ++p; + while (isspace (*p)) + ++p; + if ((int) *p == ':' && (int) *(p + 1) != ':') + kind = K_LABEL; + } + } + if (kind != K_NONE) + { + TRACE("cp0: %s\n", (const char *) cp); + if (spaceRequired && *cp && !isspace (*cp)) + continue; + + TRACE("cp1: %s\n", (const char *) cp); + while (isspace (*cp)) + cp++; + + while (!*cp || '#' == *cp) { /* Gobble up empty lines + and comments */ + cp = fileReadLine (); + if (!cp) + goto END_MAIN_WHILE; + while (isspace (*cp)) + cp++; + } + + while (isIdentifier (*cp) || (K_PACKAGE == kind && ':' == *cp)) + { + vStringPut (name, (int) *cp); + cp++; + } + + if (K_FORMAT == kind && + vStringLength (name) == 0 && /* cp did not advance */ + '=' == *cp) + { + /* format's name is optional. If it's omitted, 'STDOUT' + is assumed. */ + vStringCatS (name, "STDOUT"); + } + + vStringTerminate (name); + TRACE("name: %s\n", name->buffer); + + if (0 == vStringLength(name)) { + vStringClear(name); + continue; + } + + if (K_SUBROUTINE == kind) + { + /* + * isSubroutineDeclaration() may consume several lines. So + * we record line positions. + */ + initTagEntry(&e, vStringValue(name)); + + if (TRUE == isSubroutineDeclaration(cp)) { + if (TRUE == PerlKinds[K_SUBROUTINE_DECLARATION].enabled) { + kind = K_SUBROUTINE_DECLARATION; + } else { + vStringClear (name); + continue; + } + } + + e.kind = PerlKinds[kind].letter; + e.kindName = PerlKinds[kind].name; + + makeTagEntry(&e); + + if (Option.include.qualifiedTags && qualified && + package != NULL && vStringLength (package) > 0) + { + vString *const qualifiedName = vStringNew (); + vStringCopy (qualifiedName, package); + vStringCat (qualifiedName, name); + e.name = vStringValue(qualifiedName); + makeTagEntry(&e); + vStringDelete (qualifiedName); + } + } else if (vStringLength (name) > 0) + { + makeSimpleTag (name, PerlKinds, kind); + if (Option.include.qualifiedTags && qualified && + K_PACKAGE != kind && + package != NULL && vStringLength (package) > 0) + { + vString *const qualifiedName = vStringNew (); + vStringCopy (qualifiedName, package); + vStringCat (qualifiedName, name); + makeSimpleTag (qualifiedName, PerlKinds, kind); + vStringDelete (qualifiedName); + } + } + vStringClear (name); + } + } + +END_MAIN_WHILE: + vStringDelete (name); + if (package != NULL) + vStringDelete (package); +} + +extern parserDefinition* PerlParser (void) +{ + static const char *const extensions [] = { "pl", "pm", "plx", "perl", NULL }; + parserDefinition* def = parserNew ("Perl"); + def->kinds = PerlKinds; + def->kindCount = KIND_COUNT (PerlKinds); + def->extensions = extensions; + def->parser = findPerlTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */ diff --git a/php.c b/php.c new file mode 100644 index 0000000..0dd60c5 --- /dev/null +++ b/php.c @@ -0,0 +1,237 @@ +/* +* $Id: php.c 624 2007-09-15 22:53:31Z jafl $ +* +* Copyright (c) 2000, Jesus Castagnetto +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for the PHP web page +* scripting language. Only recognizes functions and classes, not methods or +* variables. +* +* Parsing PHP defines by Pavel Hlousek , Apr 2003. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "parse.h" +#include "read.h" +#include "vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_CLASS, K_DEFINE, K_FUNCTION, K_VARIABLE +} phpKind; + +#if 0 +static kindOption PhpKinds [] = { + { TRUE, 'c', "class", "classes" }, + { TRUE, 'd', "define", "constant definitions" }, + { TRUE, 'f', "function", "functions" }, + { TRUE, 'v', "variable", "variables" } +}; +#endif + +/* +* FUNCTION DEFINITIONS +*/ + +/* JavaScript patterns are duplicated in jscript.c */ + +/* + * Cygwin doesn't support non-ASCII characters in character classes. + * This isn't a good solution to the underlying problem, because we're still + * making assumptions about the character encoding. + * Really, these regular expressions need to concentrate on what marks the + * end of an identifier, and we need something like iconv to take into + * account the user's locale (or an override on the command-line.) + */ +#ifdef __CYGWIN__ +#define ALPHA "[:alpha:]" +#define ALNUM "[:alnum:]" +#else +#define ALPHA "A-Za-z\x7f-\xff" +#define ALNUM "0-9A-Za-z\x7f-\xff" +#endif + +static void installPHPRegex (const langType language) +{ + addTagRegex(language, "(^|[ \t])class[ \t]+([" ALPHA "_][" ALNUM "_]*)", + "\\2", "c,class,classes", NULL); + addTagRegex(language, "(^|[ \t])interface[ \t]+([" ALPHA "_][" ALNUM "_]*)", + "\\2", "i,interface,interfaces", NULL); + addTagRegex(language, "(^|[ \t])define[ \t]*\\([ \t]*['\"]?([" ALPHA "_][" ALNUM "_]*)", + "\\2", "d,define,constant definitions", NULL); + addTagRegex(language, "(^|[ \t])function[ \t]+&?[ \t]*([" ALPHA "_][" ALNUM "_]*)", + "\\2", "f,function,functions", NULL); + addTagRegex(language, "(^|[ \t])(\\$|::\\$|\\$this->)([" ALPHA "_][" ALNUM "_]*)[ \t]*=", + "\\3", "v,variable,variables", NULL); + addTagRegex(language, "(^|[ \t])(var|public|protected|private|static)[ \t]+\\$([" ALPHA "_][" ALNUM "_]*)[ \t]*[=;]", + "\\3", "v,variable,variables", NULL); + + /* function regex is covered by PHP regex */ + addTagRegex (language, "(^|[ \t])([A-Za-z0-9_]+)[ \t]*[=:][ \t]*function[ \t]*\\(", + "\\2", "j,jsfunction,javascript functions", NULL); + addTagRegex (language, "(^|[ \t])([A-Za-z0-9_.]+)\\.([A-Za-z0-9_]+)[ \t]*=[ \t]*function[ \t]*\\(", + "\\2.\\3", "j,jsfunction,javascript functions", NULL); + addTagRegex (language, "(^|[ \t])([A-Za-z0-9_.]+)\\.([A-Za-z0-9_]+)[ \t]*=[ \t]*function[ \t]*\\(", + "\\3", "j,jsfunction,javascript functions", NULL); +} + +/* Create parser definition structure */ +extern parserDefinition* PhpParser (void) +{ + static const char *const extensions [] = { "php", "php3", "phtml", NULL }; + parserDefinition* def = parserNew ("PHP"); + def->extensions = extensions; + def->initialize = installPHPRegex; + def->regex = TRUE; + return def; +} + +#if 0 + +static boolean isLetter(const int c) +{ + return (boolean)(isalpha(c) || (c >= 127 && c <= 255)); +} + +static boolean isVarChar1(const int c) +{ + return (boolean)(isLetter (c) || c == '_'); +} + +static boolean isVarChar(const int c) +{ + return (boolean)(isVarChar1 (c) || isdigit (c)); +} + +static void findPhpTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp = line; + const char* f; + + while (isspace (*cp)) + cp++; + + if (*(const char*)cp == '$' && isVarChar1 (*(const char*)(cp+1))) + { + cp += 1; + vStringClear (name); + while (isVarChar ((int) *cp)) + { + vStringPut (name, (int) *cp); + ++cp; + } + while (isspace ((int) *cp)) + ++cp; + if (*(const char*) cp == '=') + { + vStringTerminate (name); + makeSimpleTag (name, PhpKinds, K_VARIABLE); + vStringClear (name); + } + } + else if ((f = strstr ((const char*) cp, "function")) != NULL && + (f == (const char*) cp || isspace ((int) f [-1])) && + isspace ((int) f [8])) + { + cp = ((const unsigned char *) f) + 8; + + while (isspace ((int) *cp)) + ++cp; + + if (*cp == '&') /* skip reference character and following whitespace */ + { + cp++; + + while (isspace ((int) *cp)) + ++cp; + } + + vStringClear (name); + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, PhpKinds, K_FUNCTION); + vStringClear (name); + } + else if (strncmp ((const char*) cp, "class", (size_t) 5) == 0 && + isspace ((int) cp [5])) + { + cp += 5; + + while (isspace ((int) *cp)) + ++cp; + vStringClear (name); + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, PhpKinds, K_CLASS); + vStringClear (name); + } + else if (strncmp ((const char*) cp, "define", (size_t) 6) == 0 && + ! isalnum ((int) cp [6])) + { + cp += 6; + + while (isspace ((int) *cp)) + ++cp; + if (*cp != '(') + continue; + ++cp; + + while (isspace ((int) *cp)) + ++cp; + if ((*cp == '\'') || (*cp == '"')) + ++cp; + else if (! ((*cp == '_') || isalnum ((int) *cp))) + continue; + + vStringClear (name); + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, PhpKinds, K_DEFINE); + vStringClear (name); + } + } + vStringDelete (name); +} + +extern parserDefinition* PhpParser (void) +{ + static const char *const extensions [] = { "php", "php3", "phtml", NULL }; + parserDefinition* def = parserNew ("PHP"); + def->kinds = PhpKinds; + def->kindCount = KIND_COUNT (PhpKinds); + def->extensions = extensions; + def->parser = findPhpTags; + return def; +} + +#endif + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/python.c b/python.c new file mode 100644 index 0000000..5fdf31b --- /dev/null +++ b/python.c @@ -0,0 +1,771 @@ +/* +* $Id: python.c 720 2009-07-07 03:55:23Z dhiebert $ +* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Python language +* files. +*/ +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "entry.h" +#include "options.h" +#include "read.h" +#include "main.h" +#include "vstring.h" +#include "routines.h" +#include "debug.h" + +/* +* DATA DECLARATIONS +*/ +typedef struct NestingLevel NestingLevel; +typedef struct NestingLevels NestingLevels; + +struct NestingLevel +{ + int indentation; + vString *name; + int type; +}; + +struct NestingLevels +{ + NestingLevel *levels; + int n; /* number of levels in use */ + int allocated; +}; + +typedef enum { + K_CLASS, K_FUNCTION, K_MEMBER, K_VARIABLE, K_IMPORT +} pythonKind; + +/* +* DATA DEFINITIONS +*/ +static kindOption PythonKinds[] = { + {TRUE, 'c', "class", "classes"}, + {TRUE, 'f', "function", "functions"}, + {TRUE, 'm', "member", "class members"}, + {TRUE, 'v', "variable", "variables"}, + {TRUE, 'i', "namespace", "imports"} +}; + +static char const * const singletriple = "'''"; +static char const * const doubletriple = "\"\"\""; + +/* +* FUNCTION DEFINITIONS +*/ + +static NestingLevels *nestingLevelsNew (void) +{ + NestingLevels *nls = xCalloc (1, NestingLevels); + return nls; +} + +static void nestingLevelsFree (NestingLevels *nls) +{ + int i; + for (i = 0; i < nls->allocated; i++) + vStringDelete(nls->levels[i].name); + if (nls->levels) eFree(nls->levels); + eFree(nls); +} + +static void nestingLevelsPush (NestingLevels *nls, + const vString *name, int type) +{ + NestingLevel *nl = NULL; + + if (nls->n >= nls->allocated) + { + nls->allocated++; + nls->levels = xRealloc(nls->levels, + nls->allocated, NestingLevel); + nls->levels[nls->n].name = vStringNew(); + } + nl = &nls->levels[nls->n]; + nls->n++; + + vStringCopy(nl->name, name); + nl->type = type; +} + +#if 0 +static NestingLevel *nestingLevelsGetCurrent (NestingLevels *nls) +{ + Assert (nls != NULL); + + if (nls->n < 1) + return NULL; + + return &nls->levels[nls->n - 1]; +} + +static void nestingLevelsPop (NestingLevels *nls) +{ + const NestingLevel *nl = nestingLevelsGetCurrent(nls); + + Assert (nl != NULL); + vStringClear(nl->name); + nls->n--; +} +#endif + +static boolean isIdentifierFirstCharacter (int c) +{ + return (boolean) (isalpha (c) || c == '_'); +} + +static boolean isIdentifierCharacter (int c) +{ + return (boolean) (isalnum (c) || c == '_'); +} + +/* Given a string with the contents of a line directly after the "def" keyword, + * extract all relevant information and create a tag. + */ +static void makeFunctionTag (vString *const function, + vString *const parent, int is_class_parent, const char *arglist __unused__) +{ + tagEntryInfo tag; + initTagEntry (&tag, vStringValue (function)); + + tag.kindName = "function"; + tag.kind = 'f'; + /* tag.extensionFields.arglist = arglist; */ + + if (vStringLength (parent) > 0) + { + if (is_class_parent) + { + tag.kindName = "member"; + tag.kind = 'm'; + tag.extensionFields.scope [0] = "class"; + tag.extensionFields.scope [1] = vStringValue (parent); + } + else + { + tag.extensionFields.scope [0] = "function"; + tag.extensionFields.scope [1] = vStringValue (parent); + } + } + + /* If a function starts with __, we mark it as file scope. + * FIXME: What is the proper way to signal such attributes? + * TODO: What does functions/classes starting with _ and __ mean in python? + */ + if (strncmp (vStringValue (function), "__", 2) == 0 && + strcmp (vStringValue (function), "__init__") != 0) + { + tag.extensionFields.access = "private"; + tag.isFileScope = TRUE; + } + else + { + tag.extensionFields.access = "public"; + } + makeTagEntry (&tag); +} + +/* Given a string with the contents of the line directly after the "class" + * keyword, extract all necessary information and create a tag. + */ +static void makeClassTag (vString *const class, vString *const inheritance, + vString *const parent, int is_class_parent) +{ + tagEntryInfo tag; + initTagEntry (&tag, vStringValue (class)); + tag.kindName = "class"; + tag.kind = 'c'; + if (vStringLength (parent) > 0) + { + if (is_class_parent) + { + tag.extensionFields.scope [0] = "class"; + tag.extensionFields.scope [1] = vStringValue (parent); + } + else + { + tag.extensionFields.scope [0] = "function"; + tag.extensionFields.scope [1] = vStringValue (parent); + } + } + tag.extensionFields.inheritance = vStringValue (inheritance); + makeTagEntry (&tag); +} + +static void makeVariableTag (vString *const var, vString *const parent) +{ + tagEntryInfo tag; + initTagEntry (&tag, vStringValue (var)); + tag.kindName = "variable"; + tag.kind = 'v'; + if (vStringLength (parent) > 0) + { + tag.extensionFields.scope [0] = "class"; + tag.extensionFields.scope [1] = vStringValue (parent); + } + makeTagEntry (&tag); +} + +/* Skip a single or double quoted string. */ +static const char *skipString (const char *cp) +{ + const char *start = cp; + int escaped = 0; + for (cp++; *cp; cp++) + { + if (escaped) + escaped--; + else if (*cp == '\\') + escaped++; + else if (*cp == *start) + return cp + 1; + } + return cp; +} + +/* Skip everything up to an identifier start. */ +static const char *skipEverything (const char *cp) +{ + for (; *cp; cp++) + { + if (*cp == '"' || *cp == '\'') + { + cp = skipString(cp); + if (!*cp) break; + } + if (isIdentifierFirstCharacter ((int) *cp)) + return cp; + } + return cp; +} + +/* Skip an identifier. */ +static const char *skipIdentifier (const char *cp) +{ + while (isIdentifierCharacter ((int) *cp)) + cp++; + return cp; +} + +static const char *findDefinitionOrClass (const char *cp) +{ + while (*cp) + { + cp = skipEverything (cp); + if (!strncmp(cp, "def", 3) || !strncmp(cp, "class", 5) || + !strncmp(cp, "cdef", 4) || !strncmp(cp, "cpdef", 5)) + { + return cp; + } + cp = skipIdentifier (cp); + } + return NULL; +} + +static const char *skipSpace (const char *cp) +{ + while (isspace ((int) *cp)) + ++cp; + return cp; +} + +/* Starting at ''cp'', parse an identifier into ''identifier''. */ +static const char *parseIdentifier (const char *cp, vString *const identifier) +{ + vStringClear (identifier); + while (isIdentifierCharacter ((int) *cp)) + { + vStringPut (identifier, (int) *cp); + ++cp; + } + vStringTerminate (identifier); + return cp; +} + +static void parseClass (const char *cp, vString *const class, + vString *const parent, int is_class_parent) +{ + vString *const inheritance = vStringNew (); + vStringClear (inheritance); + cp = parseIdentifier (cp, class); + cp = skipSpace (cp); + if (*cp == '(') + { + ++cp; + while (*cp != ')') + { + if (*cp == '\0') + { + /* Closing parenthesis can be in follow up line. */ + cp = (const char *) fileReadLine (); + if (!cp) break; + vStringPut (inheritance, ' '); + continue; + } + vStringPut (inheritance, *cp); + ++cp; + } + vStringTerminate (inheritance); + } + makeClassTag (class, inheritance, parent, is_class_parent); + vStringDelete (inheritance); +} + +static void parseImports (const char *cp) +{ + const char *pos; + vString *name, *name_next; + + cp = skipEverything (cp); + + if ((pos = strstr (cp, "import")) == NULL) + return; + + cp = pos + 6; + + /* continue only if there is some space between the keyword and the identifier */ + if (! isspace (*cp)) + return; + + cp++; + cp = skipSpace (cp); + + name = vStringNew (); + name_next = vStringNew (); + + cp = skipEverything (cp); + while (*cp) + { + cp = parseIdentifier (cp, name); + + cp = skipEverything (cp); + /* we parse the next possible import statement as well to be able to ignore 'foo' in + * 'import foo as bar' */ + parseIdentifier (cp, name_next); + + /* take the current tag only if the next one is not "as" */ + if (strcmp (vStringValue (name_next), "as") != 0 && + strcmp (vStringValue (name), "as") != 0) + { + makeSimpleTag (name, PythonKinds, K_IMPORT); + } + } + vStringDelete (name); + vStringDelete (name_next); +} + +/* modified from get.c getArglistFromStr(). + * warning: terminates rest of string past arglist! + * note: does not ignore brackets inside strings! */ +static char *parseArglist(const char *buf) +{ + char *start, *end; + int level; + if (NULL == buf) + return NULL; + if (NULL == (start = strchr(buf, '('))) + return NULL; + for (level = 1, end = start + 1; level > 0; ++end) + { + if ('\0' == *end) + break; + else if ('(' == *end) + ++ level; + else if (')' == *end) + -- level; + } + *end = '\0'; + return strdup(start); +} + +static void parseFunction (const char *cp, vString *const def, + vString *const parent, int is_class_parent) +{ + char *arglist; + + cp = parseIdentifier (cp, def); + arglist = parseArglist (cp); + makeFunctionTag (def, parent, is_class_parent, arglist); + eFree (arglist); +} + +/* Get the combined name of a nested symbol. Classes are separated with ".", + * functions with "/". For example this code: + * class MyClass: + * def myFunction: + * def SubFunction: + * class SubClass: + * def Method: + * pass + * Would produce this string: + * MyClass.MyFunction/SubFunction/SubClass.Method + */ +static boolean constructParentString(NestingLevels *nls, int indent, + vString *result) +{ + int i; + NestingLevel *prev = NULL; + int is_class = FALSE; + vStringClear (result); + for (i = 0; i < nls->n; i++) + { + NestingLevel *nl = nls->levels + i; + if (indent <= nl->indentation) + break; + if (prev) + { + vStringCatS(result, "."); /* make Geany symbol list grouping work properly */ +/* + if (prev->type == K_CLASS) + vStringCatS(result, "."); + else + vStringCatS(result, "/"); +*/ + } + vStringCat(result, nl->name); + is_class = (nl->type == K_CLASS); + prev = nl; + } + return is_class; +} + +/* Check whether parent's indentation level is higher than the current level and + * if so, remove it. + */ +static void checkParent(NestingLevels *nls, int indent, vString *parent) +{ + int i; + NestingLevel *n; + + for (i = 0; i < nls->n; i++) + { + n = nls->levels + i; + /* is there a better way to compare two vStrings? */ + if (strcmp(vStringValue(parent), vStringValue(n->name)) == 0) + { + if (n && indent <= n->indentation) + { + /* remove this level by clearing its name */ + vStringClear(n->name); + } + break; + } + } +} + +static void addNestingLevel(NestingLevels *nls, int indentation, + const vString *name, boolean is_class) +{ + int i; + NestingLevel *nl = NULL; + + for (i = 0; i < nls->n; i++) + { + nl = nls->levels + i; + if (indentation <= nl->indentation) break; + } + if (i == nls->n) + { + nestingLevelsPush(nls, name, 0); + nl = nls->levels + i; + } + else + { /* reuse existing slot */ + nls->n = i + 1; + vStringCopy(nl->name, name); + } + nl->indentation = indentation; + nl->type = is_class ? K_CLASS : !K_CLASS; +} + +/* Return a pointer to the start of the next triple string, or NULL. Store + * the kind of triple string in "which" if the return is not NULL. + */ +static char const *find_triple_start(char const *string, char const **which) +{ + char const *cp = string; + + for (; *cp; cp++) + { + if (*cp == '"' || *cp == '\'') + { + if (strncmp(cp, doubletriple, 3) == 0) + { + *which = doubletriple; + return cp; + } + if (strncmp(cp, singletriple, 3) == 0) + { + *which = singletriple; + return cp; + } + cp = skipString(cp); + if (!*cp) break; + } + } + return NULL; +} + +/* Find the end of a triple string as pointed to by "which", and update "which" + * with any other triple strings following in the given string. + */ +static void find_triple_end(char const *string, char const **which) +{ + char const *s = string; + while (1) + { + /* Check if the string ends in the same line. */ + s = strstr (s, *which); + if (!s) break; + s += 3; + *which = NULL; + /* If yes, check if another one starts in the same line. */ + s = find_triple_start(s, which); + if (!s) break; + s += 3; + } +} + +static const char *findVariable(const char *line) +{ + /* Parse global and class variable names (C.x) from assignment statements. + * Object attributes (obj.x) are ignored. + * Assignment to a tuple 'x, y = 2, 3' not supported. + * TODO: ignore duplicate tags from reassignment statements. */ + const char *cp, *sp, *eq, *start; + + cp = strstr(line, "="); + if (!cp) + return NULL; + eq = cp + 1; + while (*eq) + { + if (*eq == '=') + return NULL; /* ignore '==' operator and 'x=5,y=6)' function lines */ + if (*eq == '(' || *eq == '#') + break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */ + eq++; + } + + /* go backwards to the start of the line, checking we have valid chars */ + start = cp - 1; + while (start >= line && isspace ((int) *start)) + --start; + while (start >= line && isIdentifierCharacter ((int) *start)) + --start; + if (!isIdentifierFirstCharacter(*(start + 1))) + return NULL; + sp = start; + while (sp >= line && isspace ((int) *sp)) + --sp; + if ((sp + 1) != line) /* the line isn't a simple variable assignment */ + return NULL; + /* the line is valid, parse the variable name */ + ++start; + return start; +} + +/* Skip type declaration that optionally follows a cdef/cpdef */ +static const char *skipTypeDecl (const char *cp, boolean *is_class) +{ + const char *lastStart = cp, *ptr = cp; + int loopCount = 0; + ptr = skipSpace(cp); + if (!strncmp("extern", ptr, 6)) { + ptr += 6; + ptr = skipSpace(ptr); + if (!strncmp("from", ptr, 4)) { return NULL; } + } + if (!strncmp("class", ptr, 5)) { + ptr += 5 ; + *is_class = TRUE; + ptr = skipSpace(ptr); + return ptr; + } + /* limit so that we don't pick off "int item=obj()" */ + while (*ptr && loopCount++ < 2) { + while (*ptr && *ptr != '=' && *ptr != '(' && !isspace(*ptr)) ptr++; + if (!*ptr || *ptr == '=') return NULL; + if (*ptr == '(') { + return lastStart; /* if we stopped on a '(' we are done */ + } + ptr = skipSpace(ptr); + lastStart = ptr; + while (*lastStart == '*') lastStart++; /* cdef int *identifier */ + } + return NULL; +} + +static void findPythonTags (void) +{ + vString *const continuation = vStringNew (); + vString *const name = vStringNew (); + vString *const parent = vStringNew(); + + NestingLevels *const nesting_levels = nestingLevelsNew(); + + const char *line; + int line_skip = 0; + char const *longStringLiteral = NULL; + + while ((line = (const char *) fileReadLine ()) != NULL) + { + const char *cp = line, *candidate; + char const *longstring; + char const *keyword, *variable; + int indent; + + cp = skipSpace (cp); + + if (*cp == '\0') /* skip blank line */ + continue; + + /* Skip comment if we are not inside a multi-line string. */ + if (*cp == '#' && !longStringLiteral) + continue; + + /* Deal with line continuation. */ + if (!line_skip) vStringClear(continuation); + vStringCatS(continuation, line); + vStringStripTrailing(continuation); + if (vStringLast(continuation) == '\\') + { + vStringChop(continuation); + vStringCatS(continuation, " "); + line_skip = 1; + continue; + } + cp = line = vStringValue(continuation); + cp = skipSpace (cp); + indent = cp - line; + line_skip = 0; + + checkParent(nesting_levels, indent, parent); + + /* Deal with multiline string ending. */ + if (longStringLiteral) + { + find_triple_end(cp, &longStringLiteral); + continue; + } + + /* Deal with multiline string start. */ + longstring = find_triple_start(cp, &longStringLiteral); + if (longstring) + { + longstring += 3; + find_triple_end(longstring, &longStringLiteral); + /* We don't parse for any tags in the rest of the line. */ + continue; + } + + /* Deal with def and class keywords. */ + keyword = findDefinitionOrClass (cp); + if (keyword) + { + boolean found = FALSE; + boolean is_class = FALSE; + if (!strncmp (keyword, "def ", 4)) + { + cp = skipSpace (keyword + 3); + found = TRUE; + } + else if (!strncmp (keyword, "class ", 6)) + { + cp = skipSpace (keyword + 5); + found = TRUE; + is_class = TRUE; + } + else if (!strncmp (keyword, "cdef ", 5)) + { + cp = skipSpace(keyword + 4); + candidate = skipTypeDecl (cp, &is_class); + if (candidate) + { + found = TRUE; + cp = candidate; + } + + } + else if (!strncmp (keyword, "cpdef ", 6)) + { + cp = skipSpace(keyword + 5); + candidate = skipTypeDecl (cp, &is_class); + if (candidate) + { + found = TRUE; + cp = candidate; + } + } + + if (found) + { + boolean is_parent_class; + + is_parent_class = + constructParentString(nesting_levels, indent, parent); + + if (is_class) + parseClass (cp, name, parent, is_parent_class); + else + parseFunction(cp, name, parent, is_parent_class); + + addNestingLevel(nesting_levels, indent, name, is_class); + } + } + /* Find global and class variables */ + variable = findVariable(line); + if (variable) + { + const char *start = variable; + boolean parent_is_class; + + vStringClear (name); + while (isIdentifierCharacter ((int) *start)) + { + vStringPut (name, (int) *start); + ++start; + } + vStringTerminate (name); + + parent_is_class = constructParentString(nesting_levels, indent, parent); + /* skip variables in methods */ + if (! parent_is_class && vStringLength(parent) > 0) + continue; + + makeVariableTag (name, parent); + } + /* Find and parse imports */ + parseImports(line); + } + /* Clean up all memory we allocated. */ + vStringDelete (parent); + vStringDelete (name); + vStringDelete (continuation); + nestingLevelsFree (nesting_levels); +} + +extern parserDefinition *PythonParser (void) +{ + static const char *const extensions[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL }; + parserDefinition *def = parserNew ("Python"); + def->kinds = PythonKinds; + def->kindCount = KIND_COUNT (PythonKinds); + def->extensions = extensions; + def->parser = findPythonTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/qdos.c b/qdos.c new file mode 100644 index 0000000..2adb8c3 --- /dev/null +++ b/qdos.c @@ -0,0 +1,106 @@ +/* +* $Id: qdos.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1999, Thierry Godefroy +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions to handle wildcard expansion and file name +* conversion under QDOS. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include "ctags.h" + +/* Translate the filenames from UNIX to QDOS conventions on open calls */ +int (*_Open) (const char *, int, ...) = qopen; + +long _stack = 24576; /* Plenty of stack space */ +long _memincr = 10240; /* Big increments to cut fragmentation */ +char _prog_name [] = "ctags"; +char _version [] = PROGRAM_VERSION; +char _copyright [32] = __DATE__; +char *_endmsg = "\nPress a key to exit."; +int custom_expand (char * param, char ***argvptr, int *argcptr); +int (*_cmdwildcard) () = custom_expand; + + +struct WINDOWDEF _condetails = { 208, 1, 0, 7, 512, 256, 0, 0}; +void (*_consetup) () = consetup_title; + +/* custom cmdexpand: also expands directory names */ + +#define FILEBUF_INIT 1024 /* Initial allocation size for buffer */ +#define FILEBUF_INCR 1024 /* Increment size for buffer */ + +int custom_expand (char * param, char ***argvptr, int *argcptr) +{ + int count,sl; + size_t bufsize; + char *filenamebuf; + char *ptr,*safeptr; + + /* + * Check to see if we should do wild card expansion. + * We only perform wildcard expansion if the parameter + * was not a string and if it contains one of the + * wild card characters. + * + * We also do not expand any option that starts with '-' + * as we then assume that it is a unix stylew option. + */ + if ((*param == '-') || (strpbrk (param,"*?") == NULL) ) { + return 0; + } + + if ((filenamebuf = malloc (bufsize = FILEBUF_INIT)) == NULL) { + return -1; + } +TRYAGAIN: + count = getfnl (param, filenamebuf, bufsize, QDR_ALL); + if (count == -1 && errno == ENOMEM) { + /* + * We have overflowed the buffer, so we try + * to get a bigger buffer and try again. + */ + bufsize += FILEBUF_INCR; + if ((filenamebuf = realloc (filenamebuf, bufsize)) == NULL) { + return -1; + } else { + goto TRYAGAIN; + } + } + /* + * If no files were found, then return unexpanded. + */ + if (count == 0) { + free (filenamebuf); + return 0; + } + /* + * Files were found, so add these to the list instead + * of the original parameter typed by the user. + */ + for ( ptr=filenamebuf ; count > 0 ; count -- ) { + *argvptr = (char **) realloc (*argvptr, (size_t) (((*argcptr) + 2) * sizeof (char *))); + safeptr= (char *) malloc ((size_t) (sl=strlen (ptr) + 1)); + if (safeptr == NULL || *argvptr == NULL) { + return -1; + } + (void) memcpy (safeptr,ptr, (size_t) sl); + (*argvptr) [*argcptr] = safeptr; + *argcptr += 1; + ptr += sl; + } + free (filenamebuf); + return *argcptr; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/read.c b/read.c new file mode 100644 index 0000000..7940c86 --- /dev/null +++ b/read.c @@ -0,0 +1,564 @@ +/* +* $Id: read.c 708 2009-07-04 05:29:02Z dhiebert $ +* +* Copyright (c) 1996-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains low level source and tag file read functions (newline +* conversion for source files are performed at this level). +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include +#include + +#define FILE_WRITE +#include "read.h" +#include "debug.h" +#include "entry.h" +#include "main.h" +#include "routines.h" +#include "options.h" + +/* +* DATA DEFINITIONS +*/ +inputFile File; /* globally read through macros */ +static fpos_t StartOfLine; /* holds deferred position of start of line */ + +/* +* FUNCTION DEFINITIONS +*/ + +extern void freeSourceFileResources (void) +{ + if (File.name != NULL) + vStringDelete (File.name); + if (File.path != NULL) + vStringDelete (File.path); + if (File.source.name != NULL) + vStringDelete (File.source.name); + if (File.source.tagPath != NULL) + eFree (File.source.tagPath); + if (File.line != NULL) + vStringDelete (File.line); +} + +/* + * Source file access functions + */ + +static void setInputFileName (const char *const fileName) +{ + const char *const head = fileName; + const char *const tail = baseFilename (head); + + if (File.name != NULL) + vStringDelete (File.name); + File.name = vStringNewInit (fileName); + + if (File.path != NULL) + vStringDelete (File.path); + if (tail == head) + File.path = NULL; + else + { + const size_t length = tail - head - 1; + File.path = vStringNew (); + vStringNCopyS (File.path, fileName, length); + } +} + +static void setSourceFileParameters (vString *const fileName) +{ + if (File.source.name != NULL) + vStringDelete (File.source.name); + File.source.name = fileName; + + if (File.source.tagPath != NULL) + eFree (File.source.tagPath); + if (! Option.tagRelative || isAbsolutePath (vStringValue (fileName))) + File.source.tagPath = eStrdup (vStringValue (fileName)); + else + File.source.tagPath = + relativeFilename (vStringValue (fileName), TagFile.directory); + + if (vStringLength (fileName) > TagFile.max.file) + TagFile.max.file = vStringLength (fileName); + + File.source.isHeader = isIncludeFile (vStringValue (fileName)); + File.source.language = getFileLanguage (vStringValue (fileName)); +} + +static boolean setSourceFileName (vString *const fileName) +{ + boolean result = FALSE; + if (getFileLanguage (vStringValue (fileName)) != LANG_IGNORE) + { + vString *pathName; + if (isAbsolutePath (vStringValue (fileName)) || File.path == NULL) + pathName = vStringNewCopy (fileName); + else + pathName = combinePathAndFile ( + vStringValue (File.path), vStringValue (fileName)); + setSourceFileParameters (pathName); + result = TRUE; + } + return result; +} + +/* + * Line directive parsing + */ + +static int skipWhite (void) +{ + int c; + do + c = getc (File.fp); + while (c == ' ' || c == '\t'); + return c; +} + +static unsigned long readLineNumber (void) +{ + unsigned long lNum = 0; + int c = skipWhite (); + while (c != EOF && isdigit (c)) + { + lNum = (lNum * 10) + (c - '0'); + c = getc (File.fp); + } + ungetc (c, File.fp); + if (c != ' ' && c != '\t') + lNum = 0; + + return lNum; +} + +/* While ANSI only permits lines of the form: + * # line n "filename" + * Earlier compilers generated lines of the form + * # n filename + * GNU C will output lines of the form: + * # n "filename" + * So we need to be fairly flexible in what we accept. + */ +static vString *readFileName (void) +{ + vString *const fileName = vStringNew (); + boolean quoteDelimited = FALSE; + int c = skipWhite (); + + if (c == '"') + { + c = getc (File.fp); /* skip double-quote */ + quoteDelimited = TRUE; + } + while (c != EOF && c != '\n' && + (quoteDelimited ? (c != '"') : (c != ' ' && c != '\t'))) + { + vStringPut (fileName, c); + c = getc (File.fp); + } + if (c == '\n') + ungetc (c, File.fp); + vStringPut (fileName, '\0'); + + return fileName; +} + +static boolean parseLineDirective (void) +{ + boolean result = FALSE; + int c = skipWhite (); + DebugStatement ( const char* lineStr = ""; ) + + if (isdigit (c)) + { + ungetc (c, File.fp); + result = TRUE; + } + else if (c == 'l' && getc (File.fp) == 'i' && + getc (File.fp) == 'n' && getc (File.fp) == 'e') + { + c = getc (File.fp); + if (c == ' ' || c == '\t') + { + DebugStatement ( lineStr = "line"; ) + result = TRUE; + } + } + if (result) + { + const unsigned long lNum = readLineNumber (); + if (lNum == 0) + result = FALSE; + else + { + vString *const fileName = readFileName (); + if (vStringLength (fileName) == 0) + { + File.source.lineNumber = lNum - 1; /* applies to NEXT line */ + DebugStatement ( debugPrintf (DEBUG_RAW, "#%s %ld", lineStr, lNum); ) + } + else if (setSourceFileName (fileName)) + { + File.source.lineNumber = lNum - 1; /* applies to NEXT line */ + DebugStatement ( debugPrintf (DEBUG_RAW, "#%s %ld \"%s\"", + lineStr, lNum, vStringValue (fileName)); ) + } + + if (Option.include.fileNames && vStringLength (fileName) > 0 && + lNum == 1) + { + tagEntryInfo tag; + initTagEntry (&tag, baseFilename (vStringValue (fileName))); + + tag.isFileEntry = TRUE; + tag.lineNumberEntry = TRUE; + tag.lineNumber = 1; + tag.kindName = "file"; + tag.kind = 'F'; + + makeTagEntry (&tag); + } + vStringDelete (fileName); + result = TRUE; + } + } + return result; +} + +/* + * Source file I/O operations + */ + +/* This function opens a source file, and resets the line counter. If it + * fails, it will display an error message and leave the File.fp set to NULL. + */ +extern boolean fileOpen (const char *const fileName, const langType language) +{ +#ifdef VMS + const char *const openMode = "r"; +#else + const char *const openMode = "rb"; +#endif + boolean opened = FALSE; + + /* If another file was already open, then close it. + */ + if (File.fp != NULL) + { + fclose (File.fp); /* close any open source file */ + File.fp = NULL; + } + + File.fp = fopen (fileName, openMode); + if (File.fp == NULL) + error (WARNING | PERROR, "cannot open \"%s\"", fileName); + else + { + opened = TRUE; + + setInputFileName (fileName); + fgetpos (File.fp, &StartOfLine); + fgetpos (File.fp, &File.filePosition); + File.currentLine = NULL; + File.language = language; + File.lineNumber = 0L; + File.eof = FALSE; + File.newLine = TRUE; + + if (File.line != NULL) + vStringClear (File.line); + + setSourceFileParameters (vStringNewInit (fileName)); + File.source.lineNumber = 0L; + + verbose ("OPENING %s as %s language %sfile\n", fileName, + getLanguageName (language), + File.source.isHeader ? "include " : ""); + } + return opened; +} + +extern void fileClose (void) +{ + if (File.fp != NULL) + { + /* The line count of the file is 1 too big, since it is one-based + * and is incremented upon each newline. + */ + if (Option.printTotals) + { + fileStatus *status = eStat (vStringValue (File.name)); + addTotals (0, File.lineNumber - 1L, status->size); + } + fclose (File.fp); + File.fp = NULL; + } +} + +extern boolean fileEOF (void) +{ + return File.eof; +} + +/* Action to take for each encountered source newline. + */ +static void fileNewline (void) +{ + File.filePosition = StartOfLine; + File.newLine = FALSE; + File.lineNumber++; + File.source.lineNumber++; + DebugStatement ( if (Option.breakLine == File.lineNumber) lineBreak (); ) + DebugStatement ( debugPrintf (DEBUG_RAW, "%6ld: ", File.lineNumber); ) +} + +/* This function reads a single character from the stream, performing newline + * canonicalization. + */ +static int iFileGetc (void) +{ + int c; +readnext: + c = getc (File.fp); + + /* If previous character was a newline, then we're starting a line. + */ + if (File.newLine && c != EOF) + { + fileNewline (); + if (c == '#' && Option.lineDirectives) + { + if (parseLineDirective ()) + goto readnext; + else + { + fsetpos (File.fp, &StartOfLine); + c = getc (File.fp); + } + } + } + + if (c == EOF) + File.eof = TRUE; + else if (c == NEWLINE) + { + File.newLine = TRUE; + fgetpos (File.fp, &StartOfLine); + } + else if (c == CRETURN) + { + /* Turn line breaks into a canonical form. The three commonly + * used forms if line breaks: LF (UNIX/Mac OS X), CR (Mac OS 9), + * and CR-LF (MS-DOS) are converted into a generic newline. + */ +#ifndef macintosh + const int next = getc (File.fp); /* is CR followed by LF? */ + if (next != NEWLINE) + ungetc (next, File.fp); + else +#endif + { + c = NEWLINE; /* convert CR into newline */ + File.newLine = TRUE; + fgetpos (File.fp, &StartOfLine); + } + } + DebugStatement ( debugPutc (DEBUG_RAW, c); ) + return c; +} + +extern void fileUngetc (int c) +{ + File.ungetch = c; +} + +static vString *iFileGetLine (void) +{ + vString *result = NULL; + int c; + if (File.line == NULL) + File.line = vStringNew (); + vStringClear (File.line); + do + { + c = iFileGetc (); + if (c != EOF) + vStringPut (File.line, c); + if (c == '\n' || (c == EOF && vStringLength (File.line) > 0)) + { + vStringTerminate (File.line); +#ifdef HAVE_REGEX + if (vStringLength (File.line) > 0) + matchRegex (File.line, File.source.language); +#endif + result = File.line; + break; + } + } while (c != EOF); + Assert (result != NULL || File.eof); + return result; +} + +/* Do not mix use of fileReadLine () and fileGetc () for the same file. + */ +extern int fileGetc (void) +{ + int c; + + /* If there is an ungotten character, then return it. Don't do any + * other processing on it, though, because we already did that the + * first time it was read through fileGetc (). + */ + if (File.ungetch != '\0') + { + c = File.ungetch; + File.ungetch = '\0'; + return c; /* return here to avoid re-calling debugPutc () */ + } + do + { + if (File.currentLine != NULL) + { + c = *File.currentLine++; + if (c == '\0') + File.currentLine = NULL; + } + else + { + vString* const line = iFileGetLine (); + if (line != NULL) + File.currentLine = (unsigned char*) vStringValue (line); + if (File.currentLine == NULL) + c = EOF; + else + c = '\0'; + } + } while (c == '\0'); + DebugStatement ( debugPutc (DEBUG_READ, c); ) + return c; +} + +extern int fileSkipToCharacter (int c) +{ + int d; + do + { + d = fileGetc (); + } while (d != EOF && d != c); + return d; +} + +/* An alternative interface to fileGetc (). Do not mix use of fileReadLine() + * and fileGetc() for the same file. The returned string does not contain + * the terminating newline. A NULL return value means that all lines in the + * file have been read and we are at the end of file. + */ +extern const unsigned char *fileReadLine (void) +{ + vString* const line = iFileGetLine (); + const unsigned char* result = NULL; + if (line != NULL) + { + result = (const unsigned char*) vStringValue (line); + vStringStripNewline (line); + DebugStatement ( debugPrintf (DEBUG_READ, "%s\n", result); ) + } + return result; +} + +/* + * Source file line reading with automatic buffer sizing + */ +extern char *readLine (vString *const vLine, FILE *const fp) +{ + char *result = NULL; + + vStringClear (vLine); + if (fp == NULL) /* to free memory allocated to buffer */ + error (FATAL, "NULL file pointer"); + else + { + boolean reReadLine; + + /* If reading the line places any character other than a null or a + * newline at the last character position in the buffer (one less + * than the buffer size), then we must resize the buffer and + * reattempt to read the line. + */ + do + { + char *const pLastChar = vStringValue (vLine) + vStringSize (vLine) -2; + fpos_t startOfLine; + + fgetpos (fp, &startOfLine); + reReadLine = FALSE; + *pLastChar = '\0'; + result = fgets (vStringValue (vLine), (int) vStringSize (vLine), fp); + if (result == NULL) + { + if (! feof (fp)) + error (FATAL | PERROR, "Failure on attempt to read file"); + } + else if (*pLastChar != '\0' && + *pLastChar != '\n' && *pLastChar != '\r') + { + /* buffer overflow */ + reReadLine = vStringAutoResize (vLine); + if (reReadLine) + fsetpos (fp, &startOfLine); + else + error (FATAL | PERROR, "input line too big; out of memory"); + } + else + { + char* eol; + vStringSetLength (vLine); + /* canonicalize new line */ + eol = vStringValue (vLine) + vStringLength (vLine) - 1; + if (*eol == '\r') + *eol = '\n'; + else if (*(eol - 1) == '\r' && *eol == '\n') + { + *(eol - 1) = '\n'; + *eol = '\0'; + --vLine->length; + } + } + } while (reReadLine); + } + return result; +} + +/* Places into the line buffer the contents of the line referenced by + * "location". + */ +extern char *readSourceLine ( + vString *const vLine, fpos_t location, long *const pSeekValue) +{ + fpos_t orignalPosition; + char *result; + + fgetpos (File.fp, &orignalPosition); + fsetpos (File.fp, &location); + if (pSeekValue != NULL) + *pSeekValue = ftell (File.fp); + result = readLine (vLine, File.fp); + if (result == NULL) + error (FATAL, "Unexpected end of file: %s", vStringValue (File.name)); + fsetpos (File.fp, &orignalPosition); + + return result; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/read.h b/read.h new file mode 100644 index 0000000..ad49a82 --- /dev/null +++ b/read.h @@ -0,0 +1,116 @@ +/* +* $Id: read.h 659 2008-04-20 23:27:48Z elliotth $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to read.c +*/ +#ifndef _READ_H +#define _READ_H + +#if defined(FILE_WRITE) || defined(VAXC) +# define CONST_FILE +#else +# define CONST_FILE const +#endif + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include +#include + +#include "parse.h" +#include "vstring.h" + +/* +* MACROS +*/ +#define getInputLineNumber() File.lineNumber +#define getInputFileName() vStringValue (File.source.name) +#define getInputFilePosition() File.filePosition +#define getSourceFileName() vStringValue (File.source.name) +#define getSourceFileTagPath() File.source.tagPath +#define getSourceLanguage() File.source.language +#define getSourceLanguageName() getLanguageName (File.source.language) +#define getSourceLineNumber() File.source.lineNumber +#define isLanguage(lang) (boolean)((lang) == File.source.language) +#define isHeaderFile() File.source.isHeader + +/* +* DATA DECLARATIONS +*/ + +enum eCharacters { + /* white space characters */ + SPACE = ' ', + NEWLINE = '\n', + CRETURN = '\r', + FORMFEED = '\f', + TAB = '\t', + VTAB = '\v', + + /* some hard to read characters */ + DOUBLE_QUOTE = '"', + SINGLE_QUOTE = '\'', + BACKSLASH = '\\', + + STRING_SYMBOL = ('S' + 0x80), + CHAR_SYMBOL = ('C' + 0x80) +}; + +/* Maintains the state of the current source file. + */ +typedef struct sInputFile { + vString *name; /* name of input file */ + vString *path; /* path of input file (if any) */ + vString *line; /* last line read from file */ + const unsigned char* currentLine; /* current line being worked on */ + FILE *fp; /* stream used for reading the file */ + unsigned long lineNumber; /* line number in the input file */ + fpos_t filePosition; /* file position of current line */ + int ungetch; /* a single character that was ungotten */ + boolean eof; /* have we reached the end of file? */ + boolean newLine; /* will the next character begin a new line? */ + langType language; /* language of input file */ + + /* Contains data pertaining to the original source file in which the tag + * was defined. This may be different from the input file when #line + * directives are processed (i.e. the input file is preprocessor output). + */ + struct sSource { + vString *name; /* name to report for source file */ + char *tagPath; /* path of source file relative to tag file */ + unsigned long lineNumber;/* line number in the source file */ + boolean isHeader; /* is source file a header file? */ + langType language; /* language of source file */ + } source; +} inputFile; + +/* +* GLOBAL VARIABLES +*/ +extern CONST_FILE inputFile File; + +/* +* FUNCTION PROTOTYPES +*/ +extern void freeSourceFileResources (void); +extern boolean fileOpen (const char *const fileName, const langType language); +extern boolean fileEOF (void); +extern void fileClose (void); +extern int fileGetc (void); +extern int fileSkipToCharacter (int c); +extern void fileUngetc (int c); +extern const unsigned char *fileReadLine (void); +extern char *readLine (vString *const vLine, FILE *const fp); +extern char *readSourceLine (vString *const vLine, fpos_t location, long *const pSeekValue); + +#endif /* _READ_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/readtags.c b/readtags.c new file mode 100644 index 0000000..86442d1 --- /dev/null +++ b/readtags.c @@ -0,0 +1,959 @@ +/* +* $Id: readtags.c 592 2007-07-31 03:30:41Z dhiebert $ +* +* Copyright (c) 1996-2003, Darren Hiebert +* +* This source code is released into the public domain. +* +* This module contains functions for reading tag files. +*/ + +/* +* INCLUDE FILES +*/ +#include +#include +#include +#include +#include +#include /* to declare off_t */ + +#include "readtags.h" + +/* +* MACROS +*/ +#define TAB '\t' + + +/* +* DATA DECLARATIONS +*/ +typedef struct { + size_t size; + char *buffer; +} vstring; + +/* Information about current tag file */ +struct sTagFile { + /* has the file been opened and this structure initialized? */ + short initialized; + /* format of tag file */ + short format; + /* how is the tag file sorted? */ + sortType sortMethod; + /* pointer to file structure */ + FILE* fp; + /* file position of first character of `line' */ + off_t pos; + /* size of tag file in seekable positions */ + off_t size; + /* last line read */ + vstring line; + /* name of tag in last line read */ + vstring name; + /* defines tag search state */ + struct { + /* file position of last match for tag */ + off_t pos; + /* name of tag last searched for */ + char *name; + /* length of name for partial matches */ + size_t nameLength; + /* peforming partial match */ + short partial; + /* ignoring case */ + short ignorecase; + } search; + /* miscellaneous extension fields */ + struct { + /* number of entries in `list' */ + unsigned short max; + /* list of key value pairs */ + tagExtensionField *list; + } fields; + /* buffers to be freed at close */ + struct { + /* name of program author */ + char *author; + /* name of program */ + char *name; + /* URL of distribution */ + char *url; + /* program version */ + char *version; + } program; +}; + +/* +* DATA DEFINITIONS +*/ +const char *const EmptyString = ""; +const char *const PseudoTagPrefix = "!_"; + +/* +* FUNCTION DEFINITIONS +*/ + +/* + * Compare two strings, ignoring case. + * Return 0 for match, < 0 for smaller, > 0 for bigger + * Make sure case is folded to uppercase in comparison (like for 'sort -f') + * This makes a difference when one of the chars lies between upper and lower + * ie. one of the chars [ \ ] ^ _ ` for ascii. (The '_' in particular !) + */ +static int struppercmp (const char *s1, const char *s2) +{ + int result; + do + { + result = toupper ((int) *s1) - toupper ((int) *s2); + } while (result == 0 && *s1++ != '\0' && *s2++ != '\0'); + return result; +} + +static int strnuppercmp (const char *s1, const char *s2, size_t n) +{ + int result; + do + { + result = toupper ((int) *s1) - toupper ((int) *s2); + } while (result == 0 && --n > 0 && *s1++ != '\0' && *s2++ != '\0'); + return result; +} + +static int growString (vstring *s) +{ + int result = 0; + size_t newLength; + char *newLine; + if (s->size == 0) + { + newLength = 128; + newLine = (char*) malloc (newLength); + *newLine = '\0'; + } + else + { + newLength = 2 * s->size; + newLine = (char*) realloc (s->buffer, newLength); + } + if (newLine == NULL) + perror ("string too large"); + else + { + s->buffer = newLine; + s->size = newLength; + result = 1; + } + return result; +} + +/* Copy name of tag out of tag line */ +static void copyName (tagFile *const file) +{ + size_t length; + const char *end = strchr (file->line.buffer, '\t'); + if (end == NULL) + { + end = strchr (file->line.buffer, '\n'); + if (end == NULL) + end = strchr (file->line.buffer, '\r'); + } + if (end != NULL) + length = end - file->line.buffer; + else + length = strlen (file->line.buffer); + while (length >= file->name.size) + growString (&file->name); + strncpy (file->name.buffer, file->line.buffer, length); + file->name.buffer [length] = '\0'; +} + +static int readTagLineRaw (tagFile *const file) +{ + int result = 1; + int reReadLine; + + /* If reading the line places any character other than a null or a + * newline at the last character position in the buffer (one less than + * the buffer size), then we must resize the buffer and reattempt to read + * the line. + */ + do + { + char *const pLastChar = file->line.buffer + file->line.size - 2; + char *line; + + file->pos = ftell (file->fp); + reReadLine = 0; + *pLastChar = '\0'; + line = fgets (file->line.buffer, (int) file->line.size, file->fp); + if (line == NULL) + { + /* read error */ + if (! feof (file->fp)) + perror ("readTagLine"); + result = 0; + } + else if (*pLastChar != '\0' && + *pLastChar != '\n' && *pLastChar != '\r') + { + /* buffer overflow */ + growString (&file->line); + fseek (file->fp, file->pos, SEEK_SET); + reReadLine = 1; + } + else + { + size_t i = strlen (file->line.buffer); + while (i > 0 && + (file->line.buffer [i - 1] == '\n' || file->line.buffer [i - 1] == '\r')) + { + file->line.buffer [i - 1] = '\0'; + --i; + } + } + } while (reReadLine && result); + if (result) + copyName (file); + return result; +} + +static int readTagLine (tagFile *const file) +{ + int result; + do + { + result = readTagLineRaw (file); + } while (result && *file->name.buffer == '\0'); + return result; +} + +static tagResult growFields (tagFile *const file) +{ + tagResult result = TagFailure; + unsigned short newCount = (unsigned short) 2 * file->fields.max; + tagExtensionField *newFields = (tagExtensionField*) + realloc (file->fields.list, newCount * sizeof (tagExtensionField)); + if (newFields == NULL) + perror ("too many extension fields"); + else + { + file->fields.list = newFields; + file->fields.max = newCount; + result = TagSuccess; + } + return result; +} + +static void parseExtensionFields (tagFile *const file, tagEntry *const entry, + char *const string) +{ + char *p = string; + while (p != NULL && *p != '\0') + { + while (*p == TAB) + *p++ = '\0'; + if (*p != '\0') + { + char *colon; + char *field = p; + p = strchr (p, TAB); + if (p != NULL) + *p++ = '\0'; + colon = strchr (field, ':'); + if (colon == NULL) + entry->kind = field; + else + { + const char *key = field; + const char *value = colon + 1; + *colon = '\0'; + if (strcmp (key, "kind") == 0) + entry->kind = value; + else if (strcmp (key, "file") == 0) + entry->fileScope = 1; + else if (strcmp (key, "line") == 0) + entry->address.lineNumber = atol (value); + else + { + if (entry->fields.count == file->fields.max) + growFields (file); + file->fields.list [entry->fields.count].key = key; + file->fields.list [entry->fields.count].value = value; + ++entry->fields.count; + } + } + } + } +} + +static void parseTagLine (tagFile *file, tagEntry *const entry) +{ + int i; + char *p = file->line.buffer; + char *tab = strchr (p, TAB); + + entry->fields.list = NULL; + entry->fields.count = 0; + entry->kind = NULL; + entry->fileScope = 0; + + entry->name = p; + if (tab != NULL) + { + *tab = '\0'; + p = tab + 1; + entry->file = p; + tab = strchr (p, TAB); + if (tab != NULL) + { + int fieldsPresent; + *tab = '\0'; + p = tab + 1; + if (*p == '/' || *p == '?') + { + /* parse pattern */ + int delimiter = *(unsigned char*) p; + entry->address.lineNumber = 0; + entry->address.pattern = p; + do + { + p = strchr (p + 1, delimiter); + } while (p != NULL && *(p - 1) == '\\'); + if (p == NULL) + { + /* invalid pattern */ + } + else + ++p; + } + else if (isdigit ((int) *(unsigned char*) p)) + { + /* parse line number */ + entry->address.pattern = p; + entry->address.lineNumber = atol (p); + while (isdigit ((int) *(unsigned char*) p)) + ++p; + } + else + { + /* invalid pattern */ + } + fieldsPresent = (strncmp (p, ";\"", 2) == 0); + *p = '\0'; + if (fieldsPresent) + parseExtensionFields (file, entry, p + 2); + } + } + if (entry->fields.count > 0) + entry->fields.list = file->fields.list; + for (i = entry->fields.count ; i < file->fields.max ; ++i) + { + file->fields.list [i].key = NULL; + file->fields.list [i].value = NULL; + } +} + +static char *duplicate (const char *str) +{ + char *result = NULL; + if (str != NULL) + { + result = strdup (str); + if (result == NULL) + perror (NULL); + } + return result; +} + +static void readPseudoTags (tagFile *const file, tagFileInfo *const info) +{ + fpos_t startOfLine; + const size_t prefixLength = strlen (PseudoTagPrefix); + if (info != NULL) + { + info->file.format = 1; + info->file.sort = TAG_UNSORTED; + info->program.author = NULL; + info->program.name = NULL; + info->program.url = NULL; + info->program.version = NULL; + } + while (1) + { + fgetpos (file->fp, &startOfLine); + if (! readTagLine (file)) + break; + if (strncmp (file->line.buffer, PseudoTagPrefix, prefixLength) != 0) + break; + else + { + tagEntry entry; + const char *key, *value; + parseTagLine (file, &entry); + key = entry.name + prefixLength; + value = entry.file; + if (strcmp (key, "TAG_FILE_SORTED") == 0) + file->sortMethod = (sortType) atoi (value); + else if (strcmp (key, "TAG_FILE_FORMAT") == 0) + file->format = (short) atoi (value); + else if (strcmp (key, "TAG_PROGRAM_AUTHOR") == 0) + file->program.author = duplicate (value); + else if (strcmp (key, "TAG_PROGRAM_NAME") == 0) + file->program.name = duplicate (value); + else if (strcmp (key, "TAG_PROGRAM_URL") == 0) + file->program.url = duplicate (value); + else if (strcmp (key, "TAG_PROGRAM_VERSION") == 0) + file->program.version = duplicate (value); + if (info != NULL) + { + info->file.format = file->format; + info->file.sort = file->sortMethod; + info->program.author = file->program.author; + info->program.name = file->program.name; + info->program.url = file->program.url; + info->program.version = file->program.version; + } + } + } + fsetpos (file->fp, &startOfLine); +} + +static void gotoFirstLogicalTag (tagFile *const file) +{ + fpos_t startOfLine; + const size_t prefixLength = strlen (PseudoTagPrefix); + rewind (file->fp); + while (1) + { + fgetpos (file->fp, &startOfLine); + if (! readTagLine (file)) + break; + if (strncmp (file->line.buffer, PseudoTagPrefix, prefixLength) != 0) + break; + } + fsetpos (file->fp, &startOfLine); +} + +static tagFile *initialize (const char *const filePath, tagFileInfo *const info) +{ + tagFile *result = (tagFile*) calloc ((size_t) 1, sizeof (tagFile)); + if (result != NULL) + { + growString (&result->line); + growString (&result->name); + result->fields.max = 20; + result->fields.list = (tagExtensionField*) calloc ( + result->fields.max, sizeof (tagExtensionField)); + result->fp = fopen (filePath, "r"); + if (result->fp == NULL) + { + free (result); + result = NULL; + info->status.error_number = errno; + } + else + { + fseek (result->fp, 0, SEEK_END); + result->size = ftell (result->fp); + rewind (result->fp); + readPseudoTags (result, info); + info->status.opened = 1; + result->initialized = 1; + } + } + return result; +} + +static void terminate (tagFile *const file) +{ + fclose (file->fp); + + free (file->line.buffer); + free (file->name.buffer); + free (file->fields.list); + + if (file->program.author != NULL) + free (file->program.author); + if (file->program.name != NULL) + free (file->program.name); + if (file->program.url != NULL) + free (file->program.url); + if (file->program.version != NULL) + free (file->program.version); + if (file->search.name != NULL) + free (file->search.name); + + memset (file, 0, sizeof (tagFile)); + + free (file); +} + +static tagResult readNext (tagFile *const file, tagEntry *const entry) +{ + tagResult result; + if (file == NULL || ! file->initialized) + result = TagFailure; + else if (! readTagLine (file)) + result = TagFailure; + else + { + if (entry != NULL) + parseTagLine (file, entry); + result = TagSuccess; + } + return result; +} + +static const char *readFieldValue ( + const tagEntry *const entry, const char *const key) +{ + const char *result = NULL; + int i; + if (strcmp (key, "kind") == 0) + result = entry->kind; + else if (strcmp (key, "file") == 0) + result = EmptyString; + else for (i = 0 ; i < entry->fields.count && result == NULL ; ++i) + if (strcmp (entry->fields.list [i].key, key) == 0) + result = entry->fields.list [i].value; + return result; +} + +static int readTagLineSeek (tagFile *const file, const off_t pos) +{ + int result = 0; + if (fseek (file->fp, pos, SEEK_SET) == 0) + { + result = readTagLine (file); /* read probable partial line */ + if (pos > 0 && result) + result = readTagLine (file); /* read complete line */ + } + return result; +} + +static int nameComparison (tagFile *const file) +{ + int result; + if (file->search.ignorecase) + { + if (file->search.partial) + result = strnuppercmp (file->search.name, file->name.buffer, + file->search.nameLength); + else + result = struppercmp (file->search.name, file->name.buffer); + } + else + { + if (file->search.partial) + result = strncmp (file->search.name, file->name.buffer, + file->search.nameLength); + else + result = strcmp (file->search.name, file->name.buffer); + } + return result; +} + +static void findFirstNonMatchBefore (tagFile *const file) +{ +#define JUMP_BACK 512 + int more_lines; + int comp; + off_t start = file->pos; + off_t pos = start; + do + { + if (pos < (off_t) JUMP_BACK) + pos = 0; + else + pos = pos - JUMP_BACK; + more_lines = readTagLineSeek (file, pos); + comp = nameComparison (file); + } while (more_lines && comp == 0 && pos > 0 && pos < start); +} + +static tagResult findFirstMatchBefore (tagFile *const file) +{ + tagResult result = TagFailure; + int more_lines; + off_t start = file->pos; + findFirstNonMatchBefore (file); + do + { + more_lines = readTagLine (file); + if (nameComparison (file) == 0) + result = TagSuccess; + } while (more_lines && result != TagSuccess && file->pos < start); + return result; +} + +static tagResult findBinary (tagFile *const file) +{ + tagResult result = TagFailure; + off_t lower_limit = 0; + off_t upper_limit = file->size; + off_t last_pos = 0; + off_t pos = upper_limit / 2; + while (result != TagSuccess) + { + if (! readTagLineSeek (file, pos)) + { + /* in case we fell off end of file */ + result = findFirstMatchBefore (file); + break; + } + else if (pos == last_pos) + { + /* prevent infinite loop if we backed up to beginning of file */ + break; + } + else + { + const int comp = nameComparison (file); + last_pos = pos; + if (comp < 0) + { + upper_limit = pos; + pos = lower_limit + ((upper_limit - lower_limit) / 2); + } + else if (comp > 0) + { + lower_limit = pos; + pos = lower_limit + ((upper_limit - lower_limit) / 2); + } + else if (pos == 0) + result = TagSuccess; + else + result = findFirstMatchBefore (file); + } + } + return result; +} + +static tagResult findSequential (tagFile *const file) +{ + tagResult result = TagFailure; + if (file->initialized) + { + while (result == TagFailure && readTagLine (file)) + { + if (nameComparison (file) == 0) + result = TagSuccess; + } + } + return result; +} + +static tagResult find (tagFile *const file, tagEntry *const entry, + const char *const name, const int options) +{ + tagResult result; + if (file->search.name != NULL) + free (file->search.name); + file->search.name = duplicate (name); + file->search.nameLength = strlen (name); + file->search.partial = (options & TAG_PARTIALMATCH) != 0; + file->search.ignorecase = (options & TAG_IGNORECASE) != 0; + fseek (file->fp, 0, SEEK_END); + file->size = ftell (file->fp); + rewind (file->fp); + if ((file->sortMethod == TAG_SORTED && !file->search.ignorecase) || + (file->sortMethod == TAG_FOLDSORTED && file->search.ignorecase)) + { +#ifdef DEBUG + printf ("\n"); +#endif + result = findBinary (file); + } + else + { +#ifdef DEBUG + printf ("\n"); +#endif + result = findSequential (file); + } + + if (result != TagSuccess) + file->search.pos = file->size; + else + { + file->search.pos = file->pos; + if (entry != NULL) + parseTagLine (file, entry); + } + return result; +} + +static tagResult findNext (tagFile *const file, tagEntry *const entry) +{ + tagResult result; + if ((file->sortMethod == TAG_SORTED && !file->search.ignorecase) || + (file->sortMethod == TAG_FOLDSORTED && file->search.ignorecase)) + { + result = tagsNext (file, entry); + if (result == TagSuccess && nameComparison (file) != 0) + result = TagFailure; + } + else + { + result = findSequential (file); + if (result == TagSuccess && entry != NULL) + parseTagLine (file, entry); + } + return result; +} + +/* +* EXTERNAL INTERFACE +*/ + +extern tagFile *tagsOpen (const char *const filePath, tagFileInfo *const info) +{ + return initialize (filePath, info); +} + +extern tagResult tagsSetSortType (tagFile *const file, const sortType type) +{ + tagResult result = TagFailure; + if (file != NULL && file->initialized) + { + file->sortMethod = type; + result = TagSuccess; + } + return result; +} + +extern tagResult tagsFirst (tagFile *const file, tagEntry *const entry) +{ + tagResult result = TagFailure; + if (file != NULL && file->initialized) + { + gotoFirstLogicalTag (file); + result = readNext (file, entry); + } + return result; +} + +extern tagResult tagsNext (tagFile *const file, tagEntry *const entry) +{ + tagResult result = TagFailure; + if (file != NULL && file->initialized) + result = readNext (file, entry); + return result; +} + +extern const char *tagsField (const tagEntry *const entry, const char *const key) +{ + const char *result = NULL; + if (entry != NULL) + result = readFieldValue (entry, key); + return result; +} + +extern tagResult tagsFind (tagFile *const file, tagEntry *const entry, + const char *const name, const int options) +{ + tagResult result = TagFailure; + if (file != NULL && file->initialized) + result = find (file, entry, name, options); + return result; +} + +extern tagResult tagsFindNext (tagFile *const file, tagEntry *const entry) +{ + tagResult result = TagFailure; + if (file != NULL && file->initialized) + result = findNext (file, entry); + return result; +} + +extern tagResult tagsClose (tagFile *const file) +{ + tagResult result = TagFailure; + if (file != NULL && file->initialized) + { + terminate (file); + result = TagSuccess; + } + return result; +} + +/* +* TEST FRAMEWORK +*/ + +#ifdef READTAGS_MAIN + +static const char *TagFileName = "tags"; +static const char *ProgramName; +static int extensionFields; +static int SortOverride; +static sortType SortMethod; + +static void printTag (const tagEntry *entry) +{ + int i; + int first = 1; + const char* separator = ";\""; + const char* const empty = ""; +/* "sep" returns a value only the first time it is evaluated */ +#define sep (first ? (first = 0, separator) : empty) + printf ("%s\t%s\t%s", + entry->name, entry->file, entry->address.pattern); + if (extensionFields) + { + if (entry->kind != NULL && entry->kind [0] != '\0') + printf ("%s\tkind:%s", sep, entry->kind); + if (entry->fileScope) + printf ("%s\tfile:", sep); +#if 0 + if (entry->address.lineNumber > 0) + printf ("%s\tline:%lu", sep, entry->address.lineNumber); +#endif + for (i = 0 ; i < entry->fields.count ; ++i) + printf ("%s\t%s:%s", sep, entry->fields.list [i].key, + entry->fields.list [i].value); + } + putchar ('\n'); +#undef sep +} + +static void findTag (const char *const name, const int options) +{ + tagFileInfo info; + tagEntry entry; + tagFile *const file = tagsOpen (TagFileName, &info); + if (file == NULL) + { + fprintf (stderr, "%s: cannot open tag file: %s: %s\n", + ProgramName, strerror (info.status.error_number), name); + exit (1); + } + else + { + if (SortOverride) + tagsSetSortType (file, SortMethod); + if (tagsFind (file, &entry, name, options) == TagSuccess) + { + do + { + printTag (&entry); + } while (tagsFindNext (file, &entry) == TagSuccess); + } + tagsClose (file); + } +} + +static void listTags (void) +{ + tagFileInfo info; + tagEntry entry; + tagFile *const file = tagsOpen (TagFileName, &info); + if (file == NULL) + { + fprintf (stderr, "%s: cannot open tag file: %s: %s\n", + ProgramName, strerror (info.status.error_number), TagFileName); + exit (1); + } + else + { + while (tagsNext (file, &entry) == TagSuccess) + printTag (&entry); + tagsClose (file); + } +} + +const char *const Usage = + "Find tag file entries matching specified names.\n\n" + "Usage: %s [-ilp] [-s[0|1]] [-t file] [name(s)]\n\n" + "Options:\n" + " -e Include extension fields in output.\n" + " -i Perform case-insensitive matching.\n" + " -l List all tags.\n" + " -p Perform partial matching.\n" + " -s[0|1|2] Override sort detection of tag file.\n" + " -t file Use specified tag file (default: \"tags\").\n" + "Note that options are acted upon as encountered, so order is significant.\n"; + +extern int main (int argc, char **argv) +{ + int options = 0; + int actionSupplied = 0; + int i; + ProgramName = argv [0]; + if (argc == 1) + { + fprintf (stderr, Usage, ProgramName); + exit (1); + } + for (i = 1 ; i < argc ; ++i) + { + const char *const arg = argv [i]; + if (arg [0] != '-') + { + findTag (arg, options); + actionSupplied = 1; + } + else + { + size_t j; + for (j = 1 ; arg [j] != '\0' ; ++j) + { + switch (arg [j]) + { + case 'e': extensionFields = 1; break; + case 'i': options |= TAG_IGNORECASE; break; + case 'p': options |= TAG_PARTIALMATCH; break; + case 'l': listTags (); actionSupplied = 1; break; + + case 't': + if (arg [j+1] != '\0') + { + TagFileName = arg + j + 1; + j += strlen (TagFileName); + } + else if (i + 1 < argc) + TagFileName = argv [++i]; + else + { + fprintf (stderr, Usage, ProgramName); + exit (1); + } + break; + case 's': + SortOverride = 1; + ++j; + if (arg [j] == '\0') + SortMethod = TAG_SORTED; + else if (strchr ("012", arg[j]) != NULL) + SortMethod = (sortType) (arg[j] - '0'); + else + { + fprintf (stderr, Usage, ProgramName); + exit (1); + } + break; + default: + fprintf (stderr, "%s: unknown option: %c\n", + ProgramName, arg[j]); + exit (1); + break; + } + } + } + } + if (! actionSupplied) + { + fprintf (stderr, + "%s: no action specified: specify tag name(s) or -l option\n", + ProgramName); + exit (1); + } + return 0; +} + +#endif + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/readtags.h b/readtags.h new file mode 100644 index 0000000..724f250 --- /dev/null +++ b/readtags.h @@ -0,0 +1,252 @@ +/* +* $Id: readtags.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1996-2003, Darren Hiebert +* +* This source code is released for the public domain. +* +* This file defines the public interface for looking up tag entries in tag +* files. +* +* The functions defined in this interface are intended to provide tag file +* support to a software tool. The tag lookups provided are sufficiently fast +* enough to permit opening a sorted tag file, searching for a matching tag, +* then closing the tag file each time a tag is looked up (search times are +* on the order of hundreths of a second, even for huge tag files). This is +* the recommended use of this library for most tool applications. Adhering +* to this approach permits a user to regenerate a tag file at will without +* the tool needing to detect and resynchronize with changes to the tag file. +* Even for an unsorted 24MB tag file, tag searches take about one second. +*/ +#ifndef READTAGS_H +#define READTAGS_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* +* MACROS +*/ + +/* Options for tagsSetSortType() */ +typedef enum { + TAG_UNSORTED, TAG_SORTED, TAG_FOLDSORTED +} sortType ; + +/* Options for tagsFind() */ +#define TAG_FULLMATCH 0x0 +#define TAG_PARTIALMATCH 0x1 + +#define TAG_OBSERVECASE 0x0 +#define TAG_IGNORECASE 0x2 + +/* +* DATA DECLARATIONS +*/ + +typedef enum { TagFailure = 0, TagSuccess = 1 } tagResult; + +struct sTagFile; + +typedef struct sTagFile tagFile; + +/* This structure contains information about the tag file. */ +typedef struct { + + struct { + /* was the tag file successfully opened? */ + int opened; + + /* errno value when 'opened' is false */ + int error_number; + } status; + + /* information about the structure of the tag file */ + struct { + /* format of tag file (1 = original, 2 = extended) */ + short format; + + /* how is the tag file sorted? */ + sortType sort; + } file; + + + /* information about the program which created this tag file */ + struct { + /* name of author of generating program (may be null) */ + const char *author; + + /* name of program (may be null) */ + const char *name; + + /* URL of distribution (may be null) */ + const char *url; + + /* program version (may be null) */ + const char *version; + } program; + +} tagFileInfo; + +/* This structure contains information about an extension field for a tag. + * These exist at the end of the tag in the form "key:value"). + */ +typedef struct { + + /* the key of the extension field */ + const char *key; + + /* the value of the extension field (may be an empty string) */ + const char *value; + +} tagExtensionField; + +/* This structure contains information about a specific tag. */ +typedef struct { + + /* name of tag */ + const char *name; + + /* path of source file containing definition of tag */ + const char *file; + + /* address for locating tag in source file */ + struct { + /* pattern for locating source line + * (may be NULL if not present) */ + const char *pattern; + + /* line number in source file of tag definition + * (may be zero if not known) */ + unsigned long lineNumber; + } address; + + /* kind of tag (may by name, character, or NULL if not known) */ + const char *kind; + + /* is tag of file-limited scope? */ + short fileScope; + + /* miscellaneous extension fields */ + struct { + /* number of entries in `list' */ + unsigned short count; + + /* list of key value pairs */ + tagExtensionField *list; + } fields; + +} tagEntry; + + +/* +* FUNCTION PROTOTYPES +*/ + +/* +* This function must be called before calling other functions in this +* library. It is passed the path to the tag file to read and a (possibly +* null) pointer to a structure which, if not null, will be populated with +* information about the tag file. If successful, the function will return a +* handle which must be supplied to other calls to read information from the +* tag file, and info.status.opened will be set to true. If unsuccessful, +* info.status.opened will be set to false and info.status.error_number will +* be set to the errno value representing the system error preventing the tag +* file from being successfully opened. +*/ +extern tagFile *tagsOpen (const char *const filePath, tagFileInfo *const info); + +/* +* This function allows the client to override the normal automatic detection +* of how a tag file is sorted. Permissible values for `type' are +* TAG_UNSORTED, TAG_SORTED, TAG_FOLDSORTED. Tag files in the new extended +* format contain a key indicating whether or not they are sorted. However, +* tag files in the original format do not contain such a key even when +* sorted, preventing this library from taking advantage of fast binary +* lookups. If the client knows that such an unmarked tag file is indeed +* sorted (or not), it can override the automatic detection. Note that +* incorrect lookup results will result if a tag file is marked as sorted when +* it actually is not. The function will return TagSuccess if called on an +* open tag file or TagFailure if not. +*/ +extern tagResult tagsSetSortType (tagFile *const file, const sortType type); + +/* +* Reads the first tag in the file, if any. It is passed the handle to an +* opened tag file and a (possibly null) pointer to a structure which, if not +* null, will be populated with information about the first tag file entry. +* The function will return TagSuccess another tag entry is found, or +* TagFailure if not (i.e. it reached end of file). +*/ +extern tagResult tagsFirst (tagFile *const file, tagEntry *const entry); + +/* +* Step to the next tag in the file, if any. It is passed the handle to an +* opened tag file and a (possibly null) pointer to a structure which, if not +* null, will be populated with information about the next tag file entry. The +* function will return TagSuccess another tag entry is found, or TagFailure +* if not (i.e. it reached end of file). It will always read the first tag in +* the file immediately after calling tagsOpen(). +*/ +extern tagResult tagsNext (tagFile *const file, tagEntry *const entry); + +/* +* Retrieve the value associated with the extension field for a specified key. +* It is passed a pointer to a structure already populated with values by a +* previous call to tagsNext(), tagsFind(), or tagsFindNext(), and a string +* containing the key of the desired extension field. If no such field of the +* specified key exists, the function will return null. +*/ +extern const char *tagsField (const tagEntry *const entry, const char *const key); + +/* +* Find the first tag matching `name'. The structure pointed to by `entry' +* will be populated with information about the tag file entry. If a tag file +* is sorted using the C locale, a binary search algorithm is used to search +* the tag file, resulting in very fast tag lookups, even in huge tag files. +* Various options controlling the matches can be combined by bit-wise or-ing +* certain values together. The available values are: +* +* TAG_PARTIALMATCH +* Tags whose leading characters match `name' will qualify. +* +* TAG_FULLMATCH +* Only tags whose full lengths match `name' will qualify. +* +* TAG_IGNORECASE +* Matching will be performed in a case-insenstive manner. Note that +* this disables binary searches of the tag file. +* +* TAG_OBSERVECASE +* Matching will be performed in a case-senstive manner. Note that +* this enables binary searches of the tag file. +* +* The function will return TagSuccess if a tag matching the name is found, or +* TagFailure if not. +*/ +extern tagResult tagsFind (tagFile *const file, tagEntry *const entry, const char *const name, const int options); + +/* +* Find the next tag matching the name and options supplied to the most recent +* call to tagsFind() for the same tag file. The structure pointed to by +* `entry' will be populated with information about the tag file entry. The +* function will return TagSuccess if another tag matching the name is found, +* or TagFailure if not. +*/ +extern tagResult tagsFindNext (tagFile *const file, tagEntry *const entry); + +/* +* Call tagsTerminate() at completion of reading the tag file, which will +* close the file and free any internal memory allocated. The function will +* return TagFailure is no file is currently open, TagSuccess otherwise. +*/ +extern tagResult tagsClose (tagFile *const file); + +#ifdef __cplusplus +}; +#endif + +#endif + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/rexx.c b/rexx.c new file mode 100644 index 0000000..cb90f56 --- /dev/null +++ b/rexx.c @@ -0,0 +1,39 @@ +/* +* $Id: rexx.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2001-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for the REXX language +* (http://www.rexxla.org, http://www2.hursley.ibm.com/rexx). +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* always include first */ +#include "parse.h" /* always include */ + +/* +* FUNCTION DEFINITIONS +*/ + +static void installRexxRegex (const langType language) +{ + addTagRegex (language, "^([A-Za-z0-9@#$\\.!?_]+)[ \t]*:", + "\\1", "s,subroutine,subroutines", NULL); +} + +extern parserDefinition* RexxParser (void) +{ + static const char *const extensions [] = { "cmd", "rexx", "rx", NULL }; + parserDefinition* const def = parserNew ("REXX"); + def->extensions = extensions; + def->initialize = installRexxRegex; + def->regex = TRUE; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/routines.c b/routines.c new file mode 100644 index 0000000..83bcdcc --- /dev/null +++ b/routines.c @@ -0,0 +1,891 @@ +/* +* $Id: routines.c 536 2007-06-02 06:09:00Z elliotth $ +* +* Copyright (c) 2002-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains a lose assortment of shared functions. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#ifdef HAVE_STDLIB_H +# include /* to declare malloc (), realloc () */ +#endif +#include +#include +#include +#include +#include /* to declare tempnam(), and SEEK_SET (hopefully) */ + +#ifdef HAVE_FCNTL_H +# include /* to declar O_RDWR, O_CREAT, O_EXCL */ +#endif +#ifdef HAVE_UNISTD_H +# include /* to declare mkstemp () */ +#endif + +/* To declare "struct stat" and stat (). + */ +#if defined (HAVE_SYS_TYPES_H) +# include +#else +# if defined (HAVE_TYPES_H) +# include +# endif +#endif +#ifdef HAVE_SYS_STAT_H +# include +#else +# ifdef HAVE_STAT_H +# include +# endif +#endif + +#ifdef HAVE_DOS_H +# include /* to declare MAXPATH */ +#endif +#ifdef HAVE_DIRECT_H +# include /* to _getcwd */ +#endif +#ifdef HAVE_DIR_H +# include /* to declare findfirst() and findnext() */ +#endif +#ifdef HAVE_IO_H +# include /* to declare open() */ +#endif +#include "debug.h" +#include "routines.h" + +/* +* MACROS +*/ +#ifndef TMPDIR +# define TMPDIR "/tmp" +#endif + +/* File type tests. + */ +#ifndef S_ISREG +# if defined (S_IFREG) && ! defined (AMIGA) +# define S_ISREG(mode) ((mode) & S_IFREG) +# else +# define S_ISREG(mode) TRUE /* assume regular file */ +# endif +#endif + +#ifndef S_ISLNK +# ifdef S_IFLNK +# define S_ISLNK(mode) (((mode) & S_IFMT) == S_IFLNK) +# else +# define S_ISLNK(mode) FALSE /* assume no soft links */ +# endif +#endif + +#ifndef S_ISDIR +# ifdef S_IFDIR +# define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) +# else +# define S_ISDIR(mode) FALSE /* assume no soft links */ +# endif +#endif + +#ifndef S_IFMT +# define S_IFMT 0 +#endif + +#ifndef S_IXUSR +# define S_IXUSR 0 +#endif +#ifndef S_IXGRP +# define S_IXGRP 0 +#endif +#ifndef S_IXOTH +# define S_IXOTH 0 +#endif + +#ifndef S_IRUSR +# define S_IRUSR 0400 +#endif +#ifndef S_IWUSR +# define S_IWUSR 0200 +#endif + +#ifndef S_ISUID +# define S_ISUID 0 +#endif + +/* Hack for rediculous practice of Microsoft Visual C++. + */ +#if defined (WIN32) +# if defined (_MSC_VER) +# define stat _stat +# define getcwd _getcwd +# define currentdrive() (_getdrive() + 'A' - 1) +# define PATH_MAX _MAX_PATH +# elif defined (__BORLANDC__) +# define PATH_MAX MAXPATH +# define currentdrive() (getdisk() + 'A') +# elif defined (DJGPP) +# define currentdrive() (getdisk() + 'A') +# else +# define currentdrive() 'C' +# endif +#endif + +#ifndef PATH_MAX +# define PATH_MAX 256 +#endif + +/* + * Miscellaneous macros + */ +#define selected(var,feature) (((int)(var) & (int)(feature)) == (int)feature) + +/* +* DATA DEFINITIONS +*/ +#if defined (MSDOS_STYLE_PATH) +const char *const PathDelimiters = ":/\\"; +#elif defined (VMS) +const char *const PathDelimiters = ":]>"; +#endif + +char *CurrentDirectory; + +static const char *ExecutableProgram; +static const char *ExecutableName; + +/* +* FUNCTION PROTOTYPES +*/ +#ifdef NEED_PROTO_STAT +extern int stat (const char *, struct stat *); +#endif +#ifdef NEED_PROTO_LSTAT +extern int lstat (const char *, struct stat *); +#endif +#if defined (MSDOS) || defined (WIN32) || defined (VMS) || defined (__EMX__) || defined (AMIGA) +# define lstat(fn,buf) stat(fn,buf) +#endif + +/* +* FUNCTION DEFINITIONS +*/ + +extern void freeRoutineResources (void) +{ + if (CurrentDirectory != NULL) + eFree (CurrentDirectory); +} + +extern void setExecutableName (const char *const path) +{ + ExecutableProgram = path; + ExecutableName = baseFilename (path); +#ifdef VAXC +{ + /* remove filetype from executable name */ + char *p = strrchr (ExecutableName, '.'); + if (p != NULL) + *p = '\0'; +} +#endif +} + +extern const char *getExecutableName (void) +{ + return ExecutableName; +} + +extern const char *getExecutablePath (void) +{ + return ExecutableProgram; +} + +extern void error ( + const errorSelection selection, const char *const format, ...) +{ + va_list ap; + + va_start (ap, format); + fprintf (errout, "%s: %s", getExecutableName (), + selected (selection, WARNING) ? "Warning: " : ""); + vfprintf (errout, format, ap); + if (selected (selection, PERROR)) +#ifdef HAVE_STRERROR + fprintf (errout, " : %s", strerror (errno)); +#else + perror (" "); +#endif + fputs ("\n", errout); + va_end (ap); + if (selected (selection, FATAL)) + exit (1); +} + +/* + * Memory allocation functions + */ + +extern void *eMalloc (const size_t size) +{ + void *buffer = malloc (size); + + if (buffer == NULL) + error (FATAL, "out of memory"); + + return buffer; +} + +extern void *eCalloc (const size_t count, const size_t size) +{ + void *buffer = calloc (count, size); + + if (buffer == NULL) + error (FATAL, "out of memory"); + + return buffer; +} + +extern void *eRealloc (void *const ptr, const size_t size) +{ + void *buffer; + if (ptr == NULL) + buffer = eMalloc (size); + else + { + buffer = realloc (ptr, size); + if (buffer == NULL) + error (FATAL, "out of memory"); + } + return buffer; +} + +extern void eFree (void *const ptr) +{ + Assert (ptr != NULL); + free (ptr); +} + +/* + * String manipulation functions + */ + +/* + * Compare two strings, ignoring case. + * Return 0 for match, < 0 for smaller, > 0 for bigger + * Make sure case is folded to uppercase in comparison (like for 'sort -f') + * This makes a difference when one of the chars lies between upper and lower + * ie. one of the chars [ \ ] ^ _ ` for ascii. (The '_' in particular !) + */ +extern int struppercmp (const char *s1, const char *s2) +{ + int result; + do + { + result = toupper ((int) *s1) - toupper ((int) *s2); + } while (result == 0 && *s1++ != '\0' && *s2++ != '\0'); + return result; +} + +extern int strnuppercmp (const char *s1, const char *s2, size_t n) +{ + int result; + do + { + result = toupper ((int) *s1) - toupper ((int) *s2); + } while (result == 0 && --n > 0 && *s1++ != '\0' && *s2++ != '\0'); + return result; +} + +#ifndef HAVE_STRSTR +extern char* strstr (const char *str, const char *substr) +{ + const size_t length = strlen (substr); + const char *match = NULL; + const char *p; + + for (p = str ; *p != '\0' && match == NULL ; ++p) + if (strncmp (p, substr, length) == 0) + match = p; + return (char*) match; +} +#endif + +extern char* eStrdup (const char* str) +{ + char* result = xMalloc (strlen (str) + 1, char); + strcpy (result, str); + return result; +} + +extern void toLowerString (char* str) +{ + while (*str != '\0') + { + *str = tolower ((int) *str); + ++str; + } +} + +extern void toUpperString (char* str) +{ + while (*str != '\0') + { + *str = toupper ((int) *str); + ++str; + } +} + +/* Newly allocated string containing lower case conversion of a string. + */ +extern char* newLowerString (const char* str) +{ + char* const result = xMalloc (strlen (str) + 1, char); + int i = 0; + do + result [i] = tolower ((int) str [i]); + while (str [i++] != '\0'); + return result; +} + +/* Newly allocated string containing upper case conversion of a string. + */ +extern char* newUpperString (const char* str) +{ + char* const result = xMalloc (strlen (str) + 1, char); + int i = 0; + do + result [i] = toupper ((int) str [i]); + while (str [i++] != '\0'); + return result; +} + +/* + * File system functions + */ + +extern void setCurrentDirectory (void) +{ +#ifndef AMIGA + char* buf; +#endif + if (CurrentDirectory == NULL) + CurrentDirectory = xMalloc ((size_t) (PATH_MAX + 1), char); +#ifdef AMIGA + strcpy (CurrentDirectory, "."); +#else + buf = getcwd (CurrentDirectory, PATH_MAX); + if (buf == NULL) + perror (""); +#endif + if (CurrentDirectory [strlen (CurrentDirectory) - (size_t) 1] != + PATH_SEPARATOR) + { + sprintf (CurrentDirectory + strlen (CurrentDirectory), "%c", + OUTPUT_PATH_SEPARATOR); + } +} + +#ifdef AMIGA +static boolean isAmigaDirectory (const char *const name) +{ + boolean result = FALSE; + struct FileInfoBlock *const fib = xMalloc (1, struct FileInfoBlock); + if (fib != NULL) + { + const BPTR flock = Lock ((UBYTE *) name, (long) ACCESS_READ); + + if (flock != (BPTR) NULL) + { + if (Examine (flock, fib)) + result = ((fib->fib_DirEntryType >= 0) ? TRUE : FALSE); + UnLock (flock); + } + eFree (fib); + } + return result; +} +#endif + +/* For caching of stat() calls */ +extern fileStatus *eStat (const char *const fileName) +{ + struct stat status; + static fileStatus file; + if (file.name == NULL || strcmp (fileName, file.name) != 0) + { + eStatFree (&file); + file.name = eStrdup (fileName); + if (lstat (file.name, &status) != 0) + file.exists = FALSE; + else + { + file.isSymbolicLink = (boolean) S_ISLNK (status.st_mode); + if (file.isSymbolicLink && stat (file.name, &status) != 0) + file.exists = FALSE; + else + { + file.exists = TRUE; +#ifdef AMIGA + file.isDirectory = isAmigaDirectory (file.name); +#else + file.isDirectory = (boolean) S_ISDIR (status.st_mode); +#endif + file.isNormalFile = (boolean) (S_ISREG (status.st_mode)); + file.isExecutable = (boolean) ((status.st_mode & + (S_IXUSR | S_IXGRP | S_IXOTH)) != 0); + file.isSetuid = (boolean) ((status.st_mode & S_ISUID) != 0); + file.size = status.st_size; + } + } + } + return &file; +} + +extern void eStatFree (fileStatus *status) +{ + if (status->name != NULL) + { + eFree (status->name); + status->name = NULL; + } +} + +extern boolean doesFileExist (const char *const fileName) +{ + fileStatus *status = eStat (fileName); + return status->exists; +} + +extern boolean isRecursiveLink (const char* const dirName) +{ + boolean result = FALSE; + fileStatus *status = eStat (dirName); + if (status->isSymbolicLink) + { + char* const path = absoluteFilename (dirName); + while (path [strlen (path) - 1] == PATH_SEPARATOR) + path [strlen (path) - 1] = '\0'; + while (! result && strlen (path) > (size_t) 1) + { + char *const separator = strrchr (path, PATH_SEPARATOR); + if (separator == NULL) + break; + else if (separator == path) /* backed up to root directory */ + *(separator + 1) = '\0'; + else + *separator = '\0'; + result = isSameFile (path, dirName); + } + eFree (path); + } + return result; +} + +#ifndef HAVE_FGETPOS + +extern int fgetpos (FILE *stream, fpos_t *pos) +{ + int result = 0; + + *pos = ftell (stream); + if (*pos == -1L) + result = -1; + + return result; +} + +extern int fsetpos (FILE *stream, fpos_t const *pos) +{ + return fseek (stream, *pos, SEEK_SET); +} + +#endif + +/* + * Pathname manipulation (O/S dependent!!!) + */ + +static boolean isPathSeparator (const int c) +{ + boolean result; +#if defined (MSDOS_STYLE_PATH) || defined (VMS) + result = (boolean) (strchr (PathDelimiters, c) != NULL); +#else + result = (boolean) (c == PATH_SEPARATOR); +#endif + return result; +} + +#if ! defined (HAVE_STAT_ST_INO) + +static void canonicalizePath (char *const path __unused__) +{ +#if defined (MSDOS_STYLE_PATH) + char *p; + for (p = path ; *p != '\0' ; ++p) + if (isPathSeparator (*p) && *p != ':') + *p = PATH_SEPARATOR; +#endif +} + +#endif + +extern boolean isSameFile (const char *const name1, const char *const name2) +{ + boolean result = FALSE; +#if defined (HAVE_STAT_ST_INO) + struct stat stat1, stat2; + + if (stat (name1, &stat1) == 0 && stat (name2, &stat2) == 0) + result = (boolean) (stat1.st_ino == stat2.st_ino); +#else + { + char *const n1 = absoluteFilename (name1); + char *const n2 = absoluteFilename (name2); + canonicalizePath (n1); + canonicalizePath (n2); +# if defined (CASE_INSENSITIVE_FILENAMES) + result = (boolean) (strcasecmp (n1, n2) == 0); +#else + result = (boolean) (strcmp (n1, n2) == 0); +#endif + free (n1); + free (n2); + } +#endif + return result; +} + +extern const char *baseFilename (const char *const filePath) +{ +#if defined (MSDOS_STYLE_PATH) || defined (VMS) + const char *tail = NULL; + unsigned int i; + + /* Find whichever of the path delimiters is last. + */ + for (i = 0 ; i < strlen (PathDelimiters) ; ++i) + { + const char *sep = strrchr (filePath, PathDelimiters [i]); + + if (sep > tail) + tail = sep; + } +#else + const char *tail = strrchr (filePath, PATH_SEPARATOR); +#endif + if (tail == NULL) + tail = filePath; + else + ++tail; /* step past last delimiter */ +#ifdef VAXC + { + /* remove version number from filename */ + char *p = strrchr ((char *) tail, ';'); + if (p != NULL) + *p = '\0'; + } +#endif + + return tail; +} + +extern const char *fileExtension (const char *const fileName) +{ + const char *extension; + const char *pDelimiter = NULL; + const char *const base = baseFilename (fileName); +#ifdef QDOS + pDelimiter = strrchr (base, '_'); +#endif + if (pDelimiter == NULL) + pDelimiter = strrchr (base, '.'); + + if (pDelimiter == NULL) + extension = ""; + else + extension = pDelimiter + 1; /* skip to first char of extension */ + + return extension; +} + +extern boolean isAbsolutePath (const char *const path) +{ + boolean result = FALSE; +#if defined (MSDOS_STYLE_PATH) + if (isPathSeparator (path [0])) + result = TRUE; + else if (isalpha (path [0]) && path [1] == ':') + { + if (isPathSeparator (path [2])) + result = TRUE; + else + /* We don't support non-absolute file names with a drive + * letter, like `d:NAME' (it's too much hassle). + */ + error (FATAL, + "%s: relative file names with drive letters not supported", + path); + } +#elif defined (VMS) + result = (boolean) (strchr (path, ':') != NULL); +#else + result = isPathSeparator (path [0]); +#endif + return result; +} + +extern vString *combinePathAndFile ( + const char *const path, const char *const file) +{ + vString *const filePath = vStringNew (); +#ifdef VMS + const char *const directoryId = strstr (file, ".DIR;1"); + + if (directoryId == NULL) + { + const char *const versionId = strchr (file, ';'); + + vStringCopyS (filePath, path); + if (versionId == NULL) + vStringCatS (filePath, file); + else + vStringNCatS (filePath, file, versionId - file); + vStringCopyToLower (filePath, filePath); + } + else + { + /* File really is a directory; append it to the path. + * Gotcha: doesn't work with logical names. + */ + vStringNCopyS (filePath, path, strlen (path) - 1); + vStringPut (filePath, '.'); + vStringNCatS (filePath, file, directoryId - file); + if (strchr (path, '[') != NULL) + vStringPut (filePath, ']'); + else + vStringPut (filePath, '>'); + vStringTerminate (filePath); + } +#else + const int lastChar = path [strlen (path) - 1]; + boolean terminated = isPathSeparator (lastChar); + + vStringCopyS (filePath, path); + if (! terminated) + { + vStringPut (filePath, OUTPUT_PATH_SEPARATOR); + vStringTerminate (filePath); + } + vStringCatS (filePath, file); +#endif + + return filePath; +} + +/* Return a newly-allocated string whose contents concatenate those of + * s1, s2, s3. + * Routine adapted from Gnu etags. + */ +static char* concat (const char *s1, const char *s2, const char *s3) +{ + int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3); + char *result = xMalloc (len1 + len2 + len3 + 1, char); + + strcpy (result, s1); + strcpy (result + len1, s2); + strcpy (result + len1 + len2, s3); + result [len1 + len2 + len3] = '\0'; + + return result; +} + +/* Return a newly allocated string containing the absolute file name of FILE + * given CWD (which should end with a slash). + * Routine adapted from Gnu etags. + */ +extern char* absoluteFilename (const char *file) +{ + char *slashp, *cp; + char *res = NULL; + if (isAbsolutePath (file)) + { +#ifdef MSDOS_STYLE_PATH + if (file [1] == ':') + res = eStrdup (file); + else + { + char drive [3]; + sprintf (drive, "%c:", currentdrive ()); + res = concat (drive, file, ""); + } +#else + res = eStrdup (file); +#endif + } + else + res = concat (CurrentDirectory, file, ""); + + /* Delete the "/dirname/.." and "/." substrings. */ + slashp = strchr (res, PATH_SEPARATOR); + while (slashp != NULL && slashp [0] != '\0') + { + if (slashp[1] == '.') + { + if (slashp [2] == '.' && + (slashp [3] == PATH_SEPARATOR || slashp [3] == '\0')) + { + cp = slashp; + do + cp--; + while (cp >= res && ! isAbsolutePath (cp)); + if (cp < res) + cp = slashp;/* the absolute name begins with "/.." */ +#ifdef MSDOS_STYLE_PATH + /* Under MSDOS and NT we get `d:/NAME' as absolute file name, + * so the luser could say `d:/../NAME'. We silently treat this + * as `d:/NAME'. + */ + else if (cp [0] != PATH_SEPARATOR) + cp = slashp; +#endif + strcpy (cp, slashp + 3); + slashp = cp; + continue; + } + else if (slashp [2] == PATH_SEPARATOR || slashp [2] == '\0') + { + strcpy (slashp, slashp + 2); + continue; + } + } + slashp = strchr (slashp + 1, PATH_SEPARATOR); + } + + if (res [0] == '\0') + return eStrdup ("/"); + else + { +#ifdef MSDOS_STYLE_PATH + /* Canonicalize drive letter case. */ + if (res [1] == ':' && islower (res [0])) + res [0] = toupper (res [0]); +#endif + + return res; + } +} + +/* Return a newly allocated string containing the absolute file name of dir + * where `file' resides given `CurrentDirectory'. + * Routine adapted from Gnu etags. + */ +extern char* absoluteDirname (char *file) +{ + char *slashp, *res; + char save; + slashp = strrchr (file, PATH_SEPARATOR); + if (slashp == NULL) + res = eStrdup (CurrentDirectory); + else + { + save = slashp [1]; + slashp [1] = '\0'; + res = absoluteFilename (file); + slashp [1] = save; + } + return res; +} + +/* Return a newly allocated string containing the file name of FILE relative + * to the absolute directory DIR (which should end with a slash). + * Routine adapted from Gnu etags. + */ +extern char* relativeFilename (const char *file, const char *dir) +{ + const char *fp, *dp; + char *absdir, *res; + int i; + + /* Find the common root of file and dir (with a trailing slash). */ + absdir = absoluteFilename (file); + fp = absdir; + dp = dir; + while (*fp++ == *dp++) + continue; + fp--; + dp--; /* back to the first differing char */ + do + { /* look at the equal chars until path sep */ + if (fp == absdir) + return absdir; /* first char differs, give up */ + fp--; + dp--; + } while (*fp != PATH_SEPARATOR); + + /* Build a sequence of "../" strings for the resulting relative file name. + */ + i = 0; + while ((dp = strchr (dp + 1, PATH_SEPARATOR)) != NULL) + i += 1; + res = xMalloc (3 * i + strlen (fp + 1) + 1, char); + res [0] = '\0'; + while (i-- > 0) + strcat (res, "../"); + + /* Add the file name relative to the common root of file and dir. */ + strcat (res, fp + 1); + free (absdir); + + return res; +} + +extern FILE *tempFile (const char *const mode, char **const pName) +{ + char *name; + FILE *fp; + int fd; +#if defined(HAVE_MKSTEMP) + const char *const pattern = "tags.XXXXXX"; + const char *tmpdir = NULL; + fileStatus *file = eStat (ExecutableProgram); + if (! file->isSetuid) + tmpdir = getenv ("TMPDIR"); + if (tmpdir == NULL) + tmpdir = TMPDIR; + name = xMalloc (strlen (tmpdir) + 1 + strlen (pattern) + 1, char); + sprintf (name, "%s%c%s", tmpdir, OUTPUT_PATH_SEPARATOR, pattern); + fd = mkstemp (name); + eStatFree (file); +#elif defined(HAVE_TEMPNAM) + name = tempnam (TMPDIR, "tags"); + if (name == NULL) + error (FATAL | PERROR, "cannot allocate temporary file name"); + fd = open (name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); +#else + name = xMalloc (L_tmpnam, char); + if (tmpnam (name) != name) + error (FATAL | PERROR, "cannot assign temporary file name"); + fd = open (name, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); +#endif + if (fd == -1) + error (FATAL | PERROR, "cannot open temporary file"); + fp = fdopen (fd, mode); + if (fp == NULL) + error (FATAL | PERROR, "cannot open temporary file"); + DebugStatement ( + debugPrintf (DEBUG_STATUS, "opened temporary file %s\n", name); ) + Assert (*pName == NULL); + *pName = name; + return fp; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/routines.h b/routines.h new file mode 100644 index 0000000..c623e17 --- /dev/null +++ b/routines.h @@ -0,0 +1,134 @@ +/* +* $Id: routines.h 536 2007-06-02 06:09:00Z elliotth $ +* +* Copyright (c) 2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to routines.c +*/ +#ifndef _ROUTINES_H +#define _ROUTINES_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +/* +* MACROS +*/ +#define xMalloc(n,Type) (Type *)eMalloc((size_t)(n) * sizeof (Type)) +#define xCalloc(n,Type) (Type *)eCalloc((size_t)(n), sizeof (Type)) +#define xRealloc(p,n,Type) (Type *)eRealloc((p), (n) * sizeof (Type)) + +/* + * Portability macros + */ +#ifndef PATH_SEPARATOR +# if defined (MSDOS_STYLE_PATH) +# define PATH_SEPARATOR '\\' +# elif defined (QDOS) +# define PATH_SEPARATOR '_' +# else +# define PATH_SEPARATOR '/' +# endif +#endif + +#if defined (MSDOS_STYLE_PATH) && defined (UNIX_PATH_SEPARATOR) +# define OUTPUT_PATH_SEPARATOR '/' +#else +# define OUTPUT_PATH_SEPARATOR PATH_SEPARATOR +#endif + +/* +* DATA DECLARATIONS +*/ +#if defined (MSDOS_STYLE_PATH) || defined (VMS) +extern const char *const PathDelimiters; +#endif +extern char *CurrentDirectory; +typedef int errorSelection; +enum eErrorTypes { FATAL = 1, WARNING = 2, PERROR = 4 }; + +typedef struct { + /* Name of file for which status is valid */ + char* name; + + /* Does file exist? If not, members below do not contain valid data. */ + boolean exists; + + /* is file path a symbolic link to another file? */ + boolean isSymbolicLink; + + /* Is file (pointed to) a directory? */ + boolean isDirectory; + + /* Is file (pointed to) a normal file? */ + boolean isNormalFile; + + /* Is file (pointed to) executable? */ + boolean isExecutable; + + /* Is file (pointed to) setuid? */ + boolean isSetuid; + + /* Size of file (pointed to) */ + unsigned long size; +} fileStatus; + +/* +* FUNCTION PROTOTYPES +*/ +extern void freeRoutineResources (void); +extern void setExecutableName (const char *const path); +extern const char *getExecutableName (void); +extern const char *getExecutablePath (void); +extern void error (const errorSelection selection, const char *const format, ...) __printf__ (2, 3); + +/* Memory allocation functions */ +#ifdef NEED_PROTO_MALLOC +extern void *malloc (size_t); +extern void *realloc (void *ptr, size_t); +#endif +extern void *eMalloc (const size_t size); +extern void *eCalloc (const size_t count, const size_t size); +extern void *eRealloc (void *const ptr, const size_t size); +extern void eFree (void *const ptr); + +/* String manipulation functions */ +extern int struppercmp (const char *s1, const char *s2); +extern int strnuppercmp (const char *s1, const char *s2, size_t n); +#ifndef HAVE_STRSTR +extern char* strstr (const char *str, const char *substr); +#endif +extern char* eStrdup (const char* str); +extern void toLowerString (char* str); +extern void toUpperString (char* str); +extern char* newLowerString (const char* str); +extern char* newUpperString (const char* str); + +/* File system functions */ +extern void setCurrentDirectory (void); +extern fileStatus *eStat (const char *const fileName); +extern void eStatFree (fileStatus *status); +extern boolean doesFileExist (const char *const fileName); +extern boolean isRecursiveLink (const char* const dirName); +extern boolean isSameFile (const char *const name1, const char *const name2); +#if defined(NEED_PROTO_FGETPOS) +extern int fgetpos (FILE *stream, fpos_t *pos); +extern int fsetpos (FILE *stream, fpos_t *pos); +#endif +extern const char *baseFilename (const char *const filePath); +extern const char *fileExtension (const char *const fileName); +extern boolean isAbsolutePath (const char *const path); +extern vString *combinePathAndFile (const char *const path, const char *const file); +extern char* absoluteFilename (const char *file); +extern char* absoluteDirname (char *file); +extern char* relativeFilename (const char *file, const char *dir); +extern FILE *tempFile (const char *const mode, char **const pName); + +#endif /* _ROUTINES_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/ruby.c b/ruby.c new file mode 100644 index 0000000..8001ec7 --- /dev/null +++ b/ruby.c @@ -0,0 +1,408 @@ +/* +* $Id: ruby.c 571 2007-06-24 23:32:14Z elliotth $ +* +* Copyright (c) 2000-2001, Thaddeus Covert +* Copyright (c) 2002 Matthias Veit +* Copyright (c) 2004 Elliott Hughes +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Ruby language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "entry.h" +#include "parse.h" +#include "read.h" +#include "vstring.h" + +/* +* DATA DECLARATIONS +*/ +typedef enum { + K_UNDEFINED = -1, K_CLASS, K_METHOD, K_MODULE, K_SINGLETON +} rubyKind; + +/* +* DATA DEFINITIONS +*/ +static kindOption RubyKinds [] = { + { TRUE, 'c', "class", "classes" }, + { TRUE, 'f', "method", "methods" }, + { TRUE, 'm', "module", "modules" }, + { TRUE, 'F', "singleton method", "singleton methods" } +}; + +static stringList* nesting = 0; + +/* +* FUNCTION DEFINITIONS +*/ + +/* +* Returns a string describing the scope in 'list'. +* We record the current scope as a list of entered scopes. +* Scopes corresponding to 'if' statements and the like are +* represented by empty strings. Scopes corresponding to +* modules and classes are represented by the name of the +* module or class. +*/ +static vString* stringListToScope (const stringList* list) +{ + unsigned int i; + unsigned int chunks_output = 0; + vString* result = vStringNew (); + const unsigned int max = stringListCount (list); + for (i = 0; i < max; ++i) + { + vString* chunk = stringListItem (list, i); + if (vStringLength (chunk) > 0) + { + vStringCatS (result, (chunks_output++ > 0) ? "." : ""); + vStringCatS (result, vStringValue (chunk)); + } + } + return result; +} + +/* +* Attempts to advance 's' past 'literal'. +* Returns TRUE if it did, FALSE (and leaves 's' where +* it was) otherwise. +*/ +static boolean canMatch (const unsigned char** s, const char* literal) +{ + const int literal_length = strlen (literal); + const unsigned char next_char = *(*s + literal_length); + if (strncmp ((const char*) *s, literal, literal_length) != 0) + { + return FALSE; + } + /* Additionally check that we're at the end of a token. */ + if ( ! (next_char == 0 || isspace (next_char) || next_char == '(')) + { + return FALSE; + } + *s += literal_length; + return TRUE; +} + +/* +* Attempts to advance 'cp' past a Ruby operator method name. Returns +* TRUE if successful (and copies the name into 'name'), FALSE otherwise. +*/ +static boolean parseRubyOperator (vString* name, const unsigned char** cp) +{ + static const char* RUBY_OPERATORS[] = { + "[]", "[]=", + "**", + "!", "~", "+@", "-@", + "*", "/", "%", + "+", "-", + ">>", "<<", + "&", + "^", "|", + "<=", "<", ">", ">=", + "<=>", "==", "===", "!=", "=~", "!~", + "`", + 0 + }; + int i; + for (i = 0; RUBY_OPERATORS[i] != 0; ++i) + { + if (canMatch (cp, RUBY_OPERATORS[i])) + { + vStringCatS (name, RUBY_OPERATORS[i]); + return TRUE; + } + } + return FALSE; +} + +/* +* Emits a tag for the given 'name' of kind 'kind' at the current nesting. +*/ +static void emitRubyTag (vString* name, rubyKind kind) +{ + tagEntryInfo tag; + vString* scope; + + vStringTerminate (name); + scope = stringListToScope (nesting); + + initTagEntry (&tag, vStringValue (name)); + if (vStringLength (scope) > 0) { + tag.extensionFields.scope [0] = "class"; + tag.extensionFields.scope [1] = vStringValue (scope); + } + tag.kindName = RubyKinds [kind].name; + tag.kind = RubyKinds [kind].letter; + makeTagEntry (&tag); + + stringListAdd (nesting, vStringNewCopy (name)); + + vStringClear (name); + vStringDelete (scope); +} + +/* Tests whether 'ch' is a character in 'list'. */ +static boolean charIsIn (char ch, const char* list) +{ + return (strchr (list, ch) != 0); +} + +/* Advances 'cp' over leading whitespace. */ +static void skipWhitespace (const unsigned char** cp) +{ + while (isspace (**cp)) + { + ++*cp; + } +} + +/* +* Copies the characters forming an identifier from *cp into +* name, leaving *cp pointing to the character after the identifier. +*/ +static rubyKind parseIdentifier ( + const unsigned char** cp, vString* name, rubyKind kind) +{ + /* Method names are slightly different to class and variable names. + * A method name may optionally end with a question mark, exclamation + * point or equals sign. These are all part of the name. + * A method name may also contain a period if it's a singleton method. + */ + const char* also_ok = (kind == K_METHOD) ? "_.?!=" : "_"; + + skipWhitespace (cp); + + /* Check for an anonymous (singleton) class such as "class << HTTP". */ + if (kind == K_CLASS && **cp == '<' && *(*cp + 1) == '<') + { + return K_UNDEFINED; + } + + /* Check for operators such as "def []=(key, val)". */ + if (kind == K_METHOD || kind == K_SINGLETON) + { + if (parseRubyOperator (name, cp)) + { + return kind; + } + } + + /* Copy the identifier into 'name'. */ + while (**cp != 0 && (isalnum (**cp) || charIsIn (**cp, also_ok))) + { + char last_char = **cp; + + vStringPut (name, last_char); + ++*cp; + + if (kind == K_METHOD) + { + /* Recognize singleton methods. */ + if (last_char == '.') + { + vStringTerminate (name); + vStringClear (name); + return parseIdentifier (cp, name, K_SINGLETON); + } + + /* Recognize characters which mark the end of a method name. */ + if (charIsIn (last_char, "?!=")) + { + break; + } + } + } + return kind; +} + +static void readAndEmitTag (const unsigned char** cp, rubyKind expected_kind) +{ + if (isspace (**cp)) + { + vString *name = vStringNew (); + rubyKind actual_kind = parseIdentifier (cp, name, expected_kind); + + if (actual_kind == K_UNDEFINED || vStringLength (name) == 0) + { + /* + * What kind of tags should we create for code like this? + * + * %w(self.clfloor clfloor).each do |name| + * module_eval <<-"end;" + * def #{name}(x, y=1) + * q, r = x.divmod(y) + * q = q.to_i + * return q, r + * end + * end; + * end + * + * Or this? + * + * class << HTTP + * + * For now, we don't create any. + */ + } + else + { + emitRubyTag (name, actual_kind); + } + vStringDelete (name); + } +} + +static void enterUnnamedScope (void) +{ + stringListAdd (nesting, vStringNewInit ("")); +} + +static void findRubyTags (void) +{ + const unsigned char *line; + boolean inMultiLineComment = FALSE; + + nesting = stringListNew (); + + /* FIXME: this whole scheme is wrong, because Ruby isn't line-based. + * You could perfectly well write: + * + * def + * method + * puts("hello") + * end + * + * if you wished, and this function would fail to recognize anything. + */ + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp = line; + + if (canMatch (&cp, "=begin")) + { + inMultiLineComment = TRUE; + continue; + } + if (canMatch (&cp, "=end")) + { + inMultiLineComment = FALSE; + continue; + } + + skipWhitespace (&cp); + + /* Avoid mistakenly starting a scope for modifiers such as + * + * return if + * + * FIXME: this is fooled by code such as + * + * result = if + * + * else + * + * end + * + * FIXME: we're also fooled if someone does something heinous such as + * + * puts("hello") \ + * unless + */ + if (canMatch (&cp, "case") || canMatch (&cp, "for") || + canMatch (&cp, "if") || canMatch (&cp, "unless") || + canMatch (&cp, "while")) + { + enterUnnamedScope (); + } + + /* + * "module M", "class C" and "def m" should only be at the beginning + * of a line. + */ + if (canMatch (&cp, "module")) + { + readAndEmitTag (&cp, K_MODULE); + } + else if (canMatch (&cp, "class")) + { + readAndEmitTag (&cp, K_CLASS); + } + else if (canMatch (&cp, "def")) + { + readAndEmitTag (&cp, K_METHOD); + } + + while (*cp != '\0') + { + /* FIXME: we don't cope with here documents, + * or regular expression literals, or ... you get the idea. + * Hopefully, the restriction above that insists on seeing + * definitions at the starts of lines should keep us out of + * mischief. + */ + if (inMultiLineComment || isspace (*cp)) + { + ++cp; + } + else if (*cp == '#') + { + /* FIXME: this is wrong, but there *probably* won't be a + * definition after an interpolated string (where # doesn't + * mean 'comment'). + */ + break; + } + else if (canMatch (&cp, "begin") || canMatch (&cp, "do")) + { + enterUnnamedScope (); + } + else if (canMatch (&cp, "end") && stringListCount (nesting) > 0) + { + /* Leave the most recent scope. */ + vStringDelete (stringListLast (nesting)); + stringListRemoveLast (nesting); + } + else if (*cp == '"') + { + /* Skip string literals. + * FIXME: should cope with escapes and interpolation. + */ + do { + ++cp; + } while (*cp != 0 && *cp != '"'); + } + else if (*cp != '\0') + { + do + ++cp; + while (isalnum (*cp) || *cp == '_'); + } + } + } + stringListDelete (nesting); +} + +extern parserDefinition* RubyParser (void) +{ + static const char *const extensions [] = { "rb", "ruby", NULL }; + parserDefinition* def = parserNew ("Ruby"); + def->kinds = RubyKinds; + def->kindCount = KIND_COUNT (RubyKinds); + def->extensions = extensions; + def->parser = findRubyTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/scheme.c b/scheme.c new file mode 100644 index 0000000..e7f61f4 --- /dev/null +++ b/scheme.c @@ -0,0 +1,111 @@ +/* +* $Id: scheme.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2000-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for Scheme language +* files. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "parse.h" +#include "read.h" +#include "vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_FUNCTION, K_SET +} schemeKind; + +static kindOption SchemeKinds [] = { + { TRUE, 'f', "function", "functions" }, + { TRUE, 's', "set", "sets" } +}; + +/* +* FUNCTION DEFINITIONS +*/ + +/* Algorithm adapted from from GNU etags. + * Scheme tag functions + * look for (def... xyzzy + * look for (def... (xyzzy + * look for (def ... ((... (xyzzy .... + * look for (set! xyzzy + */ +static void readIdentifier (vString *const name, const unsigned char *cp) +{ + const unsigned char *p; + vStringClear (name); + /* Go till you get to white space or a syntactic break */ + for (p = cp; *p != '\0' && *p != '(' && *p != ')' && !isspace (*p); p++) + vStringPut (name, (int) *p); + vStringTerminate (name); +} + +static void findSchemeTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp = line; + + if (cp [0] == '(' && + (cp [1] == 'D' || cp [1] == 'd') && + (cp [2] == 'E' || cp [2] == 'e') && + (cp [3] == 'F' || cp [3] == 'f')) + { + while (!isspace (*cp)) + cp++; + /* Skip over open parens and white space */ + while (*cp != '\0' && (isspace (*cp) || *cp == '(')) + cp++; + readIdentifier (name, cp); + makeSimpleTag (name, SchemeKinds, K_FUNCTION); + } + if (cp [0] == '(' && + (cp [1] == 'S' || cp [1] == 's') && + (cp [2] == 'E' || cp [2] == 'e') && + (cp [3] == 'T' || cp [3] == 't') && + (cp [4] == '!' || cp [4] == '!') && + (isspace (cp [5]))) + { + while (*cp != '\0' && !isspace (*cp)) + cp++; + /* Skip over white space */ + while (isspace (*cp)) + cp++; + readIdentifier (name, cp); + makeSimpleTag (name, SchemeKinds, K_SET); + } + } + vStringDelete (name); +} + +extern parserDefinition* SchemeParser (void) +{ + static const char *const extensions [] = { + "SCM", "SM", "sch", "scheme", "scm", "sm", NULL + }; + parserDefinition* def = parserNew ("Scheme"); + def->kinds = SchemeKinds; + def->kindCount = KIND_COUNT (SchemeKinds); + def->extensions = extensions; + def->parser = findSchemeTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/sh.c b/sh.c new file mode 100644 index 0000000..440ed85 --- /dev/null +++ b/sh.c @@ -0,0 +1,115 @@ +/* +* $Id: sh.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2000-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for scripts for the +* Bourne shell (and its derivatives, the Korn and Z shells). +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_FUNCTION +} shKind; + +static kindOption ShKinds [] = { + { TRUE, 'f', "function", "functions"} +}; + +/* +* FUNCTION DEFINITIONS +*/ + +/* Reject any tag "main" from a file named "configure". These appear in + * here-documents in GNU autoconf scripts and will add a haystack to the + * needle. + */ +static boolean hackReject (const vString* const tagName) +{ + const char *const scriptName = baseFilename (vStringValue (File.name)); + boolean result = (boolean) ( + strcmp (scriptName, "configure") == 0 && + strcmp (vStringValue (tagName), "main") == 0); + return result; +} + +static void findShTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char* cp = line; + boolean functionFound = FALSE; + + if (line [0] == '#') + continue; + + while (isspace (*cp)) + cp++; + if (strncmp ((const char*) cp, "function", (size_t) 8) == 0 && + isspace ((int) cp [8])) + { + functionFound = TRUE; + cp += 8; + if (! isspace ((int) *cp)) + continue; + while (isspace ((int) *cp)) + ++cp; + } + if (! (isalnum ((int) *cp) || *cp == '_')) + continue; + while (isalnum ((int) *cp) || *cp == '_') + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + while (isspace ((int) *cp)) + ++cp; + if (*cp++ == '(') + { + while (isspace ((int) *cp)) + ++cp; + if (*cp == ')' && ! hackReject (name)) + functionFound = TRUE; + } + if (functionFound) + makeSimpleTag (name, ShKinds, K_FUNCTION); + vStringClear (name); + } + vStringDelete (name); +} + +extern parserDefinition* ShParser (void) +{ + static const char *const extensions [] = { + "sh", "SH", "bsh", "bash", "ksh", "zsh", NULL + }; + parserDefinition* def = parserNew ("Sh"); + def->kinds = ShKinds; + def->kindCount = KIND_COUNT (ShKinds); + def->extensions = extensions; + def->parser = findShTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/slang.c b/slang.c new file mode 100644 index 0000000..74c50c3 --- /dev/null +++ b/slang.c @@ -0,0 +1,41 @@ +/* + * $Id: slang.c 443 2006-05-30 04:37:13Z darren $ + * + * Copyright (c) 2000-2001, Francesc Rocher + * + * Author: Francesc Rocher . + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. + * + * This module contains functions for generating tags for S-Lang files. + */ + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ +#include "parse.h" + +/* + * FUNCTION DEFINITIONS + */ +static void installSlangRegex (const langType language) +{ + addTagRegex (language, + "^.*define[ \t]+([A-Z_][A-Z0-9_]*)[^;]*$", + "\\1", "f,function,functions", "i"); + addTagRegex (language, + "^[ \t]*implements[ \t]+\\([ \t]*\"([^\"]*)\"[ \t]*\\)[ \t]*;", + "\\1", "n,namespace,namespaces", NULL); +} + +extern parserDefinition* SlangParser (void) +{ + static const char *const extensions [] = { "sl", NULL }; + parserDefinition* const def = parserNew ("SLang"); + def->extensions = extensions; + def->initialize = installSlangRegex; + def->regex = TRUE; + return def; +} diff --git a/sml.c b/sml.c new file mode 100644 index 0000000..9fbb21b --- /dev/null +++ b/sml.c @@ -0,0 +1,212 @@ +/* +* $Id: sml.c 536 2007-06-02 06:09:00Z elliotth $ +* +* Copyright (c) 2002, Venkatesh Prasad Ranganath and Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for SML language files. +*/ + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ + +#include + +#include "entry.h" +#include "parse.h" +#include "read.h" +#include "vstring.h" + +/* + * DATA DECLARATIONS + */ +typedef enum { + K_AND = -2, + K_NONE = -1, + K_EXCEPTION, + K_FUNCTION, + K_FUNCTOR, + K_SIGNATURE, + K_STRUCTURE, + K_TYPE, + K_VAL +} smlKind; + +/* + * DATA DEFINITIONS + */ +static kindOption SmlKinds[] = { + { TRUE, 'e', "exception", "exception declarations" }, + { TRUE, 'f', "function", "function definitions" }, + { TRUE, 'c', "functor", "functor definitions" }, + { TRUE, 's', "signature", "signature declarations" }, + { TRUE, 'r', "structure", "structure declarations" }, + { TRUE, 't', "type", "type definitions" }, + { TRUE, 'v', "value", "value bindings" } +}; + +static struct { + const char *keyword; + smlKind kind; +} SmlKeywordTypes [] = { + { "abstype", K_TYPE }, + { "and", K_AND }, + { "datatype", K_TYPE }, + { "exception", K_EXCEPTION }, + { "functor", K_FUNCTOR }, + { "fun", K_FUNCTION }, + { "signature", K_SIGNATURE }, + { "structure", K_STRUCTURE }, + { "type", K_TYPE }, + { "val", K_VAL } +}; + +static unsigned int CommentLevel = 0; + +/* + * FUNCTION DEFINITIONS + */ + +static void makeSmlTag (smlKind type, vString *name) +{ + tagEntryInfo tag; + initTagEntry (&tag, vStringValue (name)); + tag.kindName = SmlKinds [type].name; + tag.kind = SmlKinds [type].letter; + makeTagEntry (&tag); +} + +static const unsigned char *skipSpace (const unsigned char *cp) +{ + while (isspace ((int) *cp)) + ++cp; + return cp; +} + +static boolean isIdentifier (int c) +{ + boolean result = FALSE; + /* Consider '_' as an delimiter to aid user in tracking it's usage. */ + const char *const alternateIdentifiers = "!%&$#+-<>=/?@\\~'^|*_"; + if (isalnum (c)) + result = TRUE; + else if (c != '\0' && strchr (alternateIdentifiers, c) != NULL) + result = TRUE; + return result; +} + +static const unsigned char *parseIdentifier ( + const unsigned char *cp, vString *const identifier) +{ + boolean stringLit = FALSE; + vStringClear (identifier); + while (*cp != '\0' && (!isIdentifier ((int) *cp) || stringLit)) + { + int oneback = *cp; + cp++; + if (oneback == '(' && *cp == '*' && stringLit == FALSE) + { + CommentLevel++; + return ++cp; + } + if (*cp == '"' && oneback != '\\') + { + stringLit = TRUE; + continue; + } + if (stringLit && *cp == '"' && oneback != '\\') + stringLit = FALSE; + } + if (strcmp ((const char *) cp, "") == 0 || cp == NULL) + return cp; + + while (isIdentifier ((int) *cp)) + { + vStringPut (identifier, (int) *cp); + cp++; + } + vStringTerminate (identifier); + return cp; +} + +static smlKind findNextIdentifier (const unsigned char **cp) +{ + smlKind result = K_NONE; + vString *const identifier = vStringNew (); + unsigned int count = sizeof (SmlKeywordTypes) / sizeof (SmlKeywordTypes [0]); + unsigned int i; + *cp = parseIdentifier (*cp, identifier); + for (i = 0 ; i < count && result == K_NONE ; ++i) + { + const char *id = vStringValue (identifier); + if (strcmp (id, SmlKeywordTypes [i].keyword) == 0) + result = SmlKeywordTypes [i].kind; + } + vStringDelete (identifier); + return result; +} + +static void findSmlTags (void) +{ + vString *const identifier = vStringNew (); + const unsigned char *line; + smlKind lastTag = K_NONE; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp = skipSpace (line); + do + { + smlKind foundTag; + if (CommentLevel != 0) + { + cp = (const unsigned char *) strstr ((const char *) cp, "*)"); + if (cp == NULL) + continue; + else + { + --CommentLevel; + cp += 2; + } + } + foundTag = findNextIdentifier (&cp); + if (foundTag != K_NONE) + { + cp = skipSpace (cp); + cp = parseIdentifier (cp, identifier); + if (foundTag == K_AND) + makeSmlTag (lastTag, identifier); + else + { + makeSmlTag (foundTag, identifier); + lastTag = foundTag; + } + } + if (strstr ((const char *) cp, "(*") != NULL) + { + cp += 2; + cp = (const unsigned char *) strstr ((const char *) cp, "*)"); + if (cp == NULL) + ++CommentLevel; + } + } while (cp != NULL && strcmp ((const char *) cp, "") != 0); + } + vStringDelete (identifier); +} + +extern parserDefinition *SmlParser (void) +{ + static const char *const extensions[] = { "sml", "sig", NULL }; + parserDefinition *def = parserNew ("SML"); + def->kinds = SmlKinds; + def->kindCount = KIND_COUNT (SmlKinds); + def->extensions = extensions; + def->parser = findSmlTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/sort.c b/sort.c new file mode 100644 index 0000000..09ba87a --- /dev/null +++ b/sort.c @@ -0,0 +1,230 @@ +/* +* $Id: sort.c 498 2007-02-17 22:43:15Z dhiebert $ +* +* Copyright (c) 1996-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions to sort the tag entries. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#if defined (HAVE_STDLIB_H) +# include /* to declare malloc () */ +#endif +#include +#include + +#include "debug.h" +#include "entry.h" +#include "options.h" +#include "read.h" +#include "routines.h" +#include "sort.h" + +/* +* FUNCTION DEFINITIONS +*/ + +extern void catFile (const char *const name) +{ + FILE *const fp = fopen (name, "r"); + + if (fp != NULL) + { + int c; + while ((c = getc (fp)) != EOF) + putchar (c); + fflush (stdout); + fclose (fp); + } +} + +#ifdef EXTERNAL_SORT + +#ifdef NON_CONST_PUTENV_PROTOTYPE +# define PE_CONST +#else +# define PE_CONST const +#endif + +extern void externalSortTags (const boolean toStdout) +{ + const char *const sortNormalCommand = "sort -u -o"; + const char *const sortFoldedCommand = "sort -u -f -o"; + const char *sortCommand = + Option.sorted == SO_FOLDSORTED ? sortFoldedCommand : sortNormalCommand; + PE_CONST char *const sortOrder1 = "LC_COLLATE=C"; + PE_CONST char *const sortOrder2 = "LC_ALL=C"; + const size_t length = 4 + strlen (sortOrder1) + strlen (sortOrder2) + + strlen (sortCommand) + (2 * strlen (tagFileName ())); + char *const cmd = (char *) malloc (length + 1); + int ret = -1; + + if (cmd != NULL) + { + /* Ensure ASCII value sort order. + */ +#ifdef HAVE_SETENV + setenv ("LC_COLLATE", "C", 1); + setenv ("LC_ALL", "C", 1); + sprintf (cmd, "%s %s %s", sortCommand, tagFileName (), tagFileName ()); +#else +# ifdef HAVE_PUTENV + putenv (sortOrder1); + putenv (sortOrder2); + sprintf (cmd, "%s %s %s", sortCommand, tagFileName (), tagFileName ()); +# else + sprintf (cmd, "%s %s %s %s %s", sortOrder1, sortOrder2, sortCommand, + tagFileName (), tagFileName ()); +# endif +#endif + verbose ("system (\"%s\")\n", cmd); + ret = system (cmd); + free (cmd); + + } + if (ret != 0) + error (FATAL | PERROR, "cannot sort tag file"); + else if (toStdout) + catFile (tagFileName ()); +} + +#else + +/* + * These functions provide a basic internal sort. No great memory + * optimization is performed (e.g. recursive subdivided sorts), + * so have lots of memory if you have large tag files. + */ + +static void failedSort (FILE *const fp, const char* msg) +{ + const char* const cannotSort = "cannot sort tag file"; + if (fp != NULL) + fclose (fp); + if (msg == NULL) + error (FATAL | PERROR, cannotSort); + else + error (FATAL, "%s: %s", msg, cannotSort); +} + +static int compareTagsFolded(const void *const one, const void *const two) +{ + const char *const line1 = *(const char* const*) one; + const char *const line2 = *(const char* const*) two; + + return struppercmp (line1, line2); +} + +static int compareTags (const void *const one, const void *const two) +{ + const char *const line1 = *(const char* const*) one; + const char *const line2 = *(const char* const*) two; + + return strcmp (line1, line2); +} + +static void writeSortedTags ( + char **const table, const size_t numTags, const boolean toStdout) +{ + FILE *fp; + size_t i; + + /* Write the sorted lines back into the tag file. + */ + if (toStdout) + fp = stdout; + else + { + fp = fopen (tagFileName (), "w"); + if (fp == NULL) + failedSort (fp, NULL); + } + for (i = 0 ; i < numTags ; ++i) + { + /* Here we filter out identical tag *lines* (including search + * pattern) if this is not an xref file. + */ + if (i == 0 || Option.xref || strcmp (table [i], table [i-1]) != 0) + if (fputs (table [i], fp) == EOF) + failedSort (fp, NULL); + } + if (toStdout) + fflush (fp); + else + fclose (fp); +} + +extern void internalSortTags (const boolean toStdout) +{ + vString *vLine = vStringNew (); + FILE *fp = NULL; + const char *line; + size_t i; + int (*cmpFunc)(const void *, const void *); + + /* Allocate a table of line pointers to be sorted. + */ + size_t numTags = TagFile.numTags.added + TagFile.numTags.prev; + const size_t tableSize = numTags * sizeof (char *); + char **const table = (char **) malloc (tableSize); /* line pointers */ + DebugStatement ( size_t mallocSize = tableSize; ) /* cumulative total */ + + + cmpFunc = Option.sorted == SO_FOLDSORTED ? compareTagsFolded : compareTags; + if (table == NULL) + failedSort (fp, "out of memory"); + + /* Open the tag file and place its lines into allocated buffers. + */ + fp = fopen (tagFileName (), "r"); + if (fp == NULL) + failedSort (fp, NULL); + for (i = 0 ; i < numTags && ! feof (fp) ; ) + { + line = readLine (vLine, fp); + if (line == NULL) + { + if (! feof (fp)) + failedSort (fp, NULL); + break; + } + else if (*line == '\0' || strcmp (line, "\n") == 0) + ; /* ignore blank lines */ + else + { + const size_t stringSize = strlen (line) + 1; + + table [i] = (char *) malloc (stringSize); + if (table [i] == NULL) + failedSort (fp, "out of memory"); + DebugStatement ( mallocSize += stringSize; ) + strcpy (table [i], line); + ++i; + } + } + numTags = i; + fclose (fp); + vStringDelete (vLine); + + /* Sort the lines. + */ + qsort (table, numTags, sizeof (*table), cmpFunc); + + writeSortedTags (table, numTags, toStdout); + + PrintStatus (("sort memory: %ld bytes\n", (long) mallocSize)); + for (i = 0 ; i < numTags ; ++i) + free (table [i]); + free (table); +} + +#endif + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/sort.h b/sort.h new file mode 100644 index 0000000..83d3273 --- /dev/null +++ b/sort.h @@ -0,0 +1,32 @@ +/* +* $Id: sort.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1998-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* External interface to sort.c +*/ +#ifndef _SORT_H +#define _SORT_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +/* +* FUNCTION PROTOTYPES +*/ +extern void catFile (const char *const name); + +#ifdef EXTERNAL_SORT +extern void externalSortTags (const boolean toStdout); +#else +extern void internalSortTags (const boolean toStdout); +#endif + +#endif /* _SORT_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/source.mak b/source.mak new file mode 100644 index 0000000..3e5f740 --- /dev/null +++ b/source.mak @@ -0,0 +1,122 @@ +# $Id: source.mak 720 2009-07-07 03:55:23Z dhiebert $ +# +# Shared macros + +HEADERS = \ + args.h ctags.h debug.h entry.h general.h get.h keyword.h \ + main.h options.h parse.h parsers.h read.h routines.h sort.h \ + strlist.h vstring.h + +SOURCES = \ + args.c \ + ant.c \ + asm.c \ + asp.c \ + awk.c \ + basic.c \ + beta.c \ + c.c \ + cobol.c \ + dosbatch.c \ + eiffel.c \ + entry.c \ + erlang.c \ + flex.c \ + fortran.c \ + get.c \ + html.c \ + jscript.c \ + keyword.c \ + lisp.c \ + lregex.c \ + lua.c \ + main.c \ + make.c \ + matlab.c \ + ocaml.c \ + options.c \ + parse.c \ + pascal.c \ + perl.c \ + php.c \ + python.c \ + read.c \ + rexx.c \ + routines.c \ + ruby.c \ + scheme.c \ + sh.c \ + slang.c \ + sml.c \ + sort.c \ + sql.c \ + strlist.c \ + tcl.c \ + tex.c \ + verilog.c \ + vhdl.c \ + vim.c \ + yacc.c \ + vstring.c + +ENVIRONMENT_HEADERS = \ + e_amiga.h e_djgpp.h e_mac.h e_msoft.h e_os2.h e_qdos.h e_riscos.h e_vms.h + +ENVIRONMENT_SOURCES = \ + argproc.c mac.c qdos.c + +REGEX_SOURCES = gnu_regex/regex.c + +REGEX_HEADERS = gnu_regex/regex.h + +OBJECTS = \ + args.$(OBJEXT) \ + ant.$(OBJEXT) \ + asm.$(OBJEXT) \ + asp.$(OBJEXT) \ + awk.$(OBJEXT) \ + basic.$(OBJEXT) \ + beta.$(OBJEXT) \ + c.$(OBJEXT) \ + cobol.$(OBJEXT) \ + dosbatch.$(OBJEXT) \ + eiffel.$(OBJEXT) \ + entry.$(OBJEXT) \ + erlang.$(OBJEXT) \ + flex.$(OBJEXT) \ + fortran.$(OBJEXT) \ + get.$(OBJEXT) \ + html.$(OBJEXT) \ + jscript.$(OBJEXT) \ + keyword.$(OBJEXT) \ + lisp.$(OBJEXT) \ + lregex.$(OBJEXT) \ + lua.$(OBJEXT) \ + main.$(OBJEXT) \ + make.$(OBJEXT) \ + matlab.$(OBJEXT) \ + ocaml.$(OBJEXT) \ + options.$(OBJEXT) \ + parse.$(OBJEXT) \ + pascal.$(OBJEXT) \ + perl.$(OBJEXT) \ + php.$(OBJEXT) \ + python.$(OBJEXT) \ + read.$(OBJEXT) \ + rexx.$(OBJEXT) \ + routines.$(OBJEXT) \ + ruby.$(OBJEXT) \ + scheme.$(OBJEXT) \ + sh.$(OBJEXT) \ + slang.$(OBJEXT) \ + sml.$(OBJEXT) \ + sort.$(OBJEXT) \ + sql.$(OBJEXT) \ + strlist.$(OBJEXT) \ + tcl.$(OBJEXT) \ + tex.$(OBJEXT) \ + verilog.$(OBJEXT) \ + vhdl.$(OBJEXT) \ + vim.$(OBJEXT) \ + yacc.$(OBJEXT) \ + vstring.$(OBJEXT) diff --git a/sql.c b/sql.c new file mode 100644 index 0000000..efe7e5d --- /dev/null +++ b/sql.c @@ -0,0 +1,2112 @@ +/* + * $Id: sql.c 703 2009-03-14 22:06:12Z dfishburn $ + * + * Copyright (c) 2002-2003, Darren Hiebert + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. + * + * This module contains functions for generating tags for PL/SQL language + * files. + */ + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ + +#include /* to define isalpha () */ +#include +#ifdef DEBUG +#include +#endif + +#include "debug.h" +#include "entry.h" +#include "keyword.h" +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* + * On-line "Oracle Database PL/SQL Language Reference": + * http://download.oracle.com/docs/cd/B28359_01/appdev.111/b28370/toc.htm + * + * Sample PL/SQL code is available from: + * http://www.orafaq.com/faqscrpt.htm#GENPLSQL + * + * On-line SQL Anywhere Documentation + * http://www.ianywhere.com/developer/product_manuals/sqlanywhere/index.html + */ + +/* + * MACROS + */ +#define isType(token,t) (boolean) ((token)->type == (t)) +#define isKeyword(token,k) (boolean) ((token)->keyword == (k)) + +/* + * DATA DECLARATIONS + */ + +typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; + +/* + * Used to specify type of keyword. + */ +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_is, + KEYWORD_begin, + KEYWORD_body, + KEYWORD_cursor, + KEYWORD_declare, + KEYWORD_end, + KEYWORD_function, + KEYWORD_if, + KEYWORD_loop, + KEYWORD_case, + KEYWORD_for, + KEYWORD_call, + KEYWORD_package, + KEYWORD_pragma, + KEYWORD_procedure, + KEYWORD_record, + KEYWORD_object, + KEYWORD_ref, + KEYWORD_rem, + KEYWORD_return, + KEYWORD_returns, + KEYWORD_subtype, + KEYWORD_table, + KEYWORD_trigger, + KEYWORD_type, + KEYWORD_index, + KEYWORD_event, + KEYWORD_publication, + KEYWORD_service, + KEYWORD_domain, + KEYWORD_datatype, + KEYWORD_result, + KEYWORD_url, + KEYWORD_internal, + KEYWORD_external, + KEYWORD_when, + KEYWORD_then, + KEYWORD_variable, + KEYWORD_exception, + KEYWORD_at, + KEYWORD_on, + KEYWORD_primary, + KEYWORD_references, + KEYWORD_unique, + KEYWORD_check, + KEYWORD_constraint, + KEYWORD_foreign, + KEYWORD_ml_table, + KEYWORD_ml_table_lang, + KEYWORD_ml_table_dnet, + KEYWORD_ml_table_java, + KEYWORD_ml_table_chk, + KEYWORD_ml_conn, + KEYWORD_ml_conn_lang, + KEYWORD_ml_conn_dnet, + KEYWORD_ml_conn_java, + KEYWORD_ml_conn_chk, + KEYWORD_local, + KEYWORD_temporary, + KEYWORD_drop, + KEYWORD_view, + KEYWORD_synonym, + KEYWORD_handler, + KEYWORD_comment, + KEYWORD_create, + KEYWORD_go +} keywordId; + +/* + * Used to determine whether keyword is valid for the token language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; +} keywordDesc; + +typedef enum eTokenType { + TOKEN_UNDEFINED, + TOKEN_BLOCK_LABEL_BEGIN, + TOKEN_BLOCK_LABEL_END, + TOKEN_CHARACTER, + TOKEN_CLOSE_PAREN, + TOKEN_SEMICOLON, + TOKEN_COMMA, + TOKEN_IDENTIFIER, + TOKEN_KEYWORD, + TOKEN_OPEN_PAREN, + TOKEN_OPERATOR, + TOKEN_OTHER, + TOKEN_STRING, + TOKEN_PERIOD, + TOKEN_OPEN_CURLY, + TOKEN_CLOSE_CURLY, + TOKEN_OPEN_SQUARE, + TOKEN_CLOSE_SQUARE, + TOKEN_TILDE, + TOKEN_FORWARD_SLASH +} tokenType; + +typedef struct sTokenInfoSQL { + tokenType type; + keywordId keyword; + vString * string; + vString * scope; + int begin_end_nest_lvl; + unsigned long lineNumber; + fpos_t filePosition; +} tokenInfo; + +/* + * DATA DEFINITIONS + */ + +static langType Lang_sql; + +static jmp_buf Exception; + +typedef enum { + SQLTAG_CURSOR, + SQLTAG_PROTOTYPE, + SQLTAG_FUNCTION, + SQLTAG_FIELD, + SQLTAG_LOCAL_VARIABLE, + SQLTAG_BLOCK_LABEL, + SQLTAG_PACKAGE, + SQLTAG_PROCEDURE, + SQLTAG_RECORD, + SQLTAG_SUBTYPE, + SQLTAG_TABLE, + SQLTAG_TRIGGER, + SQLTAG_VARIABLE, + SQLTAG_INDEX, + SQLTAG_EVENT, + SQLTAG_PUBLICATION, + SQLTAG_SERVICE, + SQLTAG_DOMAIN, + SQLTAG_VIEW, + SQLTAG_SYNONYM, + SQLTAG_MLTABLE, + SQLTAG_MLCONN, + SQLTAG_COUNT +} sqlKind; + +static kindOption SqlKinds [] = { + { TRUE, 'c', "cursor", "cursors" }, + { FALSE, 'd', "prototype", "prototypes" }, + { TRUE, 'f', "function", "functions" }, + { TRUE, 'F', "field", "record fields" }, + { FALSE, 'l', "local", "local variables" }, + { TRUE, 'L', "label", "block label" }, + { TRUE, 'P', "package", "packages" }, + { TRUE, 'p', "procedure", "procedures" }, + { FALSE, 'r', "record", "records" }, + { TRUE, 's', "subtype", "subtypes" }, + { TRUE, 't', "table", "tables" }, + { TRUE, 'T', "trigger", "triggers" }, + { TRUE, 'v', "variable", "variables" }, + { TRUE, 'i', "index", "indexes" }, + { TRUE, 'e', "event", "events" }, + { TRUE, 'U', "publication", "publications" }, + { TRUE, 'R', "service", "services" }, + { TRUE, 'D', "domain", "domains" }, + { TRUE, 'V', "view", "views" }, + { TRUE, 'n', "synonym", "synonyms" }, + { TRUE, 'x', "mltable", "MobiLink Table Scripts" }, + { TRUE, 'y', "mlconn", "MobiLink Conn Scripts" } +}; + +static const keywordDesc SqlKeywordTable [] = { + /* keyword keyword ID */ + { "as", KEYWORD_is }, + { "is", KEYWORD_is }, + { "begin", KEYWORD_begin }, + { "body", KEYWORD_body }, + { "cursor", KEYWORD_cursor }, + { "declare", KEYWORD_declare }, + { "end", KEYWORD_end }, + { "function", KEYWORD_function }, + { "if", KEYWORD_if }, + { "loop", KEYWORD_loop }, + { "case", KEYWORD_case }, + { "for", KEYWORD_for }, + { "call", KEYWORD_call }, + { "package", KEYWORD_package }, + { "pragma", KEYWORD_pragma }, + { "procedure", KEYWORD_procedure }, + { "record", KEYWORD_record }, + { "object", KEYWORD_object }, + { "ref", KEYWORD_ref }, + { "rem", KEYWORD_rem }, + { "return", KEYWORD_return }, + { "returns", KEYWORD_returns }, + { "subtype", KEYWORD_subtype }, + { "table", KEYWORD_table }, + { "trigger", KEYWORD_trigger }, + { "type", KEYWORD_type }, + { "index", KEYWORD_index }, + { "event", KEYWORD_event }, + { "publication", KEYWORD_publication }, + { "service", KEYWORD_service }, + { "domain", KEYWORD_domain }, + { "datatype", KEYWORD_datatype }, + { "result", KEYWORD_result }, + { "url", KEYWORD_url }, + { "internal", KEYWORD_internal }, + { "external", KEYWORD_external }, + { "when", KEYWORD_when }, + { "then", KEYWORD_then }, + { "variable", KEYWORD_variable }, + { "exception", KEYWORD_exception }, + { "at", KEYWORD_at }, + { "on", KEYWORD_on }, + { "primary", KEYWORD_primary }, + { "references", KEYWORD_references }, + { "unique", KEYWORD_unique }, + { "check", KEYWORD_check }, + { "constraint", KEYWORD_constraint }, + { "foreign", KEYWORD_foreign }, + { "ml_add_table_script", KEYWORD_ml_table }, + { "ml_add_lang_table_script", KEYWORD_ml_table_lang }, + { "ml_add_dnet_table_script", KEYWORD_ml_table_dnet }, + { "ml_add_java_table_script", KEYWORD_ml_table_java }, + { "ml_add_lang_table_script_chk", KEYWORD_ml_table_chk }, + { "ml_add_connection_script", KEYWORD_ml_conn }, + { "ml_add_lang_connection_script", KEYWORD_ml_conn_lang }, + { "ml_add_dnet_connection_script", KEYWORD_ml_conn_dnet }, + { "ml_add_java_connection_script", KEYWORD_ml_conn_java }, + { "ml_add_lang_conn_script_chk", KEYWORD_ml_conn_chk }, + { "local", KEYWORD_local }, + { "temporary", KEYWORD_temporary }, + { "drop", KEYWORD_drop }, + { "view", KEYWORD_view }, + { "synonym", KEYWORD_synonym }, + { "handler", KEYWORD_handler }, + { "comment", KEYWORD_comment }, + { "create", KEYWORD_create }, + { "go", KEYWORD_go } +}; + +/* + * FUNCTION DECLARATIONS + */ + +/* Recursive calls */ +static void parseBlock (tokenInfo *const token, const boolean local); +static void parseKeywords (tokenInfo *const token); +static void parseSqlFile (tokenInfo *const token); + +/* + * FUNCTION DEFINITIONS + */ + +static boolean isIdentChar1 (const int c) +{ + /* + * Other databases are less restrictive on the first character of + * an identifier. + * isIdentChar1 is used to identify the first character of an + * identifier, so we are removing some restrictions. + */ + return (boolean) + (isalpha (c) || c == '@' || c == '_' ); +} + +static boolean isIdentChar (const int c) +{ + return (boolean) + (isalpha (c) || isdigit (c) || c == '$' || + c == '@' || c == '_' || c == '#'); +} + +static boolean isCmdTerm (tokenInfo *const token) +{ + DebugStatement ( + debugPrintf (DEBUG_PARSE + , "\n isCmdTerm: token same tt:%d tk:%d\n" + , token->type + , token->keyword + ); + ); + + /* + * Based on the various customer sites I have been at + * the most common command delimiters are + * ; + * ~ + * / + * go + * This routine will check for any of these, more + * can easily be added by modifying readToken and + * either adding the character to: + * enum eTokenType + * enum eTokenType + */ + return ( isType (token, TOKEN_SEMICOLON) || + isType (token, TOKEN_TILDE) || + isType (token, TOKEN_FORWARD_SLASH) || + isKeyword (token, KEYWORD_go) + ); +} + +static boolean isMatchedEnd(tokenInfo *const token, int nest_lvl) +{ + boolean terminated = FALSE; + /* + * Since different forms of SQL allow the use of + * BEGIN + * ... + * END + * blocks, some statements may not be terminated using + * the standard delimiters: + * ; + * ~ + * / + * go + * This routine will check to see if we encounter and END + * for the matching nest level of BEGIN ... END statements. + * If we find one, then we can assume, the statement was terminated + * since we have fallen through to the END statement of the BEGIN + * block. + */ + if ( nest_lvl > 0 && isKeyword (token, KEYWORD_end) ) + { + if ( token->begin_end_nest_lvl == nest_lvl ) + terminated = TRUE; + } + + return terminated; +} + +static void buildSqlKeywordHash (void) +{ + const size_t count = sizeof (SqlKeywordTable) / + sizeof (SqlKeywordTable [0]); + size_t i; + for (i = 0 ; i < count ; ++i) + { + const keywordDesc* const p = &SqlKeywordTable [i]; + addKeyword (p->name, Lang_sql, (int) p->id); + } +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->string = vStringNew (); + token->scope = vStringNew (); + token->begin_end_nest_lvl = 0; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + + return token; +} + +static void deleteToken (tokenInfo *const token) +{ + vStringDelete (token->string); + vStringDelete (token->scope); + eFree (token); +} + +/* + * Tag generation functions + */ + +static void makeConstTag (tokenInfo *const token, const sqlKind kind) +{ + if (SqlKinds [kind].enabled) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + initTagEntry (&e, name); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.kindName = SqlKinds [kind].name; + e.kind = SqlKinds [kind].letter; + + makeTagEntry (&e); + } +} + +static void makeSqlTag (tokenInfo *const token, const sqlKind kind) +{ + vString * fulltag; + + if (SqlKinds [kind].enabled) + { + /* + * If a scope has been added to the token, change the token + * string to include the scope when making the tag. + */ + if ( vStringLength(token->scope) > 0 ) + { + fulltag = vStringNew (); + vStringCopy(fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue(token->string)); + vStringTerminate(fulltag); + vStringCopy(token->string, fulltag); + vStringDelete (fulltag); + } + makeConstTag (token, kind); + } +} + +/* + * Parsing functions + */ + +static void parseString (vString *const string, const int delimiter) +{ + boolean end = FALSE; + while (! end) + { + int c = fileGetc (); + if (c == EOF) + end = TRUE; + /* + else if (c == '\\') + { + c = fileGetc(); // This maybe a ' or ". // + vStringPut(string, c); + } + */ + else if (c == delimiter) + end = TRUE; + else + vStringPut (string, c); + } + vStringTerminate (string); +} + +/* Read a C identifier beginning with "firstChar" and places it into "name". +*/ +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + Assert (isIdentChar1 (c)); + do + { + vStringPut (string, c); + c = fileGetc (); + } while (isIdentChar (c)); + vStringTerminate (string); + if (!isspace (c)) + fileUngetc (c); /* unget non-identifier character */ +} + +static void readToken (tokenInfo *const token) +{ + int c; + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar: + do + { + c = fileGetc (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + /* + * Added " to the list of ignores, not sure what this + * might break but it gets by this issue: + * create table "t1" (...) + * + * Darren, the code passes all my tests for both + * Oracle and SQL Anywhere, but maybe you can tell me + * what this may effect. + */ + } + while (c == '\t' || c == ' ' || c == '\n'); + + switch (c) + { + case EOF: longjmp (Exception, (int)ExceptionEOF); break; + case '(': token->type = TOKEN_OPEN_PAREN; break; + case ')': token->type = TOKEN_CLOSE_PAREN; break; + case ';': token->type = TOKEN_SEMICOLON; break; + case '.': token->type = TOKEN_PERIOD; break; + case ',': token->type = TOKEN_COMMA; break; + case '{': token->type = TOKEN_OPEN_CURLY; break; + case '}': token->type = TOKEN_CLOSE_CURLY; break; + case '~': token->type = TOKEN_TILDE; break; + case '[': token->type = TOKEN_OPEN_SQUARE; break; + case ']': token->type = TOKEN_CLOSE_SQUARE; break; + + case '\'': + case '"': + token->type = TOKEN_STRING; + parseString (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '-': + c = fileGetc (); + if (c == '-') /* -- is this the start of a comment? */ + { + fileSkipToCharacter ('\n'); + goto getNextChar; + } + else + { + if (!isspace (c)) + fileUngetc (c); + token->type = TOKEN_OPERATOR; + } + break; + + case '<': + case '>': + { + const int initial = c; + int d = fileGetc (); + if (d == initial) + { + if (initial == '<') + token->type = TOKEN_BLOCK_LABEL_BEGIN; + else + token->type = TOKEN_BLOCK_LABEL_END; + } + else + { + fileUngetc (d); + token->type = TOKEN_UNDEFINED; + } + break; + } + + case '\\': + c = fileGetc (); + if (c != '\\' && c != '"' && c != '\'' && !isspace (c)) + fileUngetc (c); + token->type = TOKEN_CHARACTER; + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '/': + { + int d = fileGetc (); + if ( (d != '*') && /* is this the start of a comment? */ + (d != '/') ) /* is a one line comment? */ + { + token->type = TOKEN_FORWARD_SLASH; + fileUngetc (d); + } + else + { + if (d == '*') + { + do + { + fileSkipToCharacter ('*'); + c = fileGetc (); + if (c == '/') + break; + else + fileUngetc (c); + } while (c != EOF && c != '\0'); + goto getNextChar; + } + else if (d == '/') /* is this the start of a comment? */ + { + fileSkipToCharacter ('\n'); + goto getNextChar; + } + } + break; + } + + default: + if (! isIdentChar1 (c)) + token->type = TOKEN_UNDEFINED; + else + { + parseIdentifier (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + token->keyword = analyzeToken (token->string, Lang_sql); + if (isKeyword (token, KEYWORD_rem)) + { + vStringClear (token->string); + fileSkipToCharacter ('\n'); + goto getNextChar; + } + else if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + break; + } +} + +/* + * Token parsing functions + */ + +/* + * static void addContext (tokenInfo* const parent, const tokenInfo* const child) + * { + * if (vStringLength (parent->string) > 0) + * { + * vStringCatS (parent->string, "."); + * } + * vStringCatS (parent->string, vStringValue(child->string)); + * vStringTerminate(parent->string); + * } + */ + +static void addToScope (tokenInfo* const token, vString* const extra) +{ + if (vStringLength (token->scope) > 0) + { + vStringCatS (token->scope, "."); + } + vStringCatS (token->scope, vStringValue(extra)); + vStringTerminate(token->scope); +} + +/* + * Scanning functions + */ + +static void findToken (tokenInfo *const token, const tokenType type) +{ + while (! isType (token, type)) + { + readToken (token); + } +} + +static void findCmdTerm (tokenInfo *const token, const boolean check_first) +{ + int begin_end_nest_lvl = token->begin_end_nest_lvl; + + if ( check_first ) + { + if ( isCmdTerm(token) ) + return; + } + do + { + readToken (token); + } while ( !isCmdTerm(token) && !isMatchedEnd(token, begin_end_nest_lvl) ); +} + +static void skipToMatched(tokenInfo *const token) +{ + int nest_level = 0; + tokenType open_token; + tokenType close_token; + + switch (token->type) + { + case TOKEN_OPEN_PAREN: + open_token = TOKEN_OPEN_PAREN; + close_token = TOKEN_CLOSE_PAREN; + break; + case TOKEN_OPEN_CURLY: + open_token = TOKEN_OPEN_CURLY; + close_token = TOKEN_CLOSE_CURLY; + break; + case TOKEN_OPEN_SQUARE: + open_token = TOKEN_OPEN_SQUARE; + close_token = TOKEN_CLOSE_SQUARE; + break; + default: + return; + } + + /* + * This routine will skip to a matching closing token. + * It will also handle nested tokens like the (, ) below. + * ( name varchar(30), text binary(10) ) + */ + + if (isType (token, open_token)) + { + nest_level++; + while (! (isType (token, close_token) && (nest_level == 0))) + { + readToken (token); + if (isType (token, open_token)) + { + nest_level++; + } + if (isType (token, close_token)) + { + if (nest_level > 0) + { + nest_level--; + } + } + } + readToken (token); + } +} + +static void skipArgumentList (tokenInfo *const token) +{ + /* + * Other databases can have arguments with fully declared + * datatypes: + * ( name varchar(30), text binary(10) ) + * So we must check for nested open and closing parantheses + */ + + if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */ + { + skipToMatched (token); + } +} + +static void parseSubProgram (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This must handle both prototypes and the body of + * the procedures. + * + * Prototype: + * FUNCTION func_name RETURN integer; + * PROCEDURE proc_name( parameters ); + * Procedure + * FUNCTION GET_ML_USERNAME RETURN VARCHAR2 + * IS + * BEGIN + * RETURN v_sync_user_id; + * END GET_ML_USERNAME; + * + * PROCEDURE proc_name( parameters ) + * IS + * BEGIN + * END; + * CREATE PROCEDURE proc_name( parameters ) + * EXTERNAL NAME ... ; + * CREATE PROCEDURE proc_name( parameters ) + * BEGIN + * END; + * + * CREATE FUNCTION f_GetClassName( + * IN @object VARCHAR(128) + * ,IN @code VARCHAR(128) + * ) + * RETURNS VARCHAR(200) + * DETERMINISTIC + * BEGIN + * + * IF( @object = 'user_state' ) THEN + * SET something = something; + * END IF; + * + * RETURN @name; + * END; + */ + const sqlKind kind = isKeyword (token, KEYWORD_function) ? + SQLTAG_FUNCTION : SQLTAG_PROCEDURE; + Assert (isKeyword (token, KEYWORD_function) || + isKeyword (token, KEYWORD_procedure)); + readToken (name); + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + readToken (name); + readToken (token); + } + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* Reads to the next token after the TOKEN_CLOSE_PAREN */ + skipArgumentList(token); + } + + if (kind == SQLTAG_FUNCTION) + { + if (isKeyword (token, KEYWORD_return) || isKeyword (token, KEYWORD_returns)) + { + /* Read datatype */ + readToken (token); + /* + * Read token after which could be the + * command terminator if a prototype + * or an open parantheses + */ + readToken (token); + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* Reads to the next token after the TOKEN_CLOSE_PAREN */ + skipArgumentList(token); + } + } + } + if( isCmdTerm (token) ) + { + makeSqlTag (name, SQLTAG_PROTOTYPE); + } + else + { + while (!(isKeyword (token, KEYWORD_is) || + isKeyword (token, KEYWORD_begin) || + isKeyword (token, KEYWORD_at) || + isKeyword (token, KEYWORD_internal) || + isKeyword (token, KEYWORD_external) || + isKeyword (token, KEYWORD_url) || + isCmdTerm (token) + ) + ) + { + if ( isKeyword (token, KEYWORD_result) ) + { + readToken (token); + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* Reads to the next token after the TOKEN_CLOSE_PAREN */ + skipArgumentList(token); + } + } else { + readToken (token); + } + } + if (isKeyword (token, KEYWORD_at) || + isKeyword (token, KEYWORD_url) || + isKeyword (token, KEYWORD_internal) || + isKeyword (token, KEYWORD_external) ) + { + addToScope(token, name->string); + if (isType (name, TOKEN_IDENTIFIER) || + isType (name, TOKEN_STRING) || + !isKeyword (token, KEYWORD_NONE) + ) + makeSqlTag (name, kind); + + vStringClear (token->scope); + } + if (isKeyword (token, KEYWORD_is) || + isKeyword (token, KEYWORD_begin) ) + { + addToScope(token, name->string); + if (isType (name, TOKEN_IDENTIFIER) || + isType (name, TOKEN_STRING) || + !isKeyword (token, KEYWORD_NONE) + ) + makeSqlTag (name, kind); + + parseBlock (token, TRUE); + vStringClear (token->scope); + } + } + deleteToken (name); +} + +static void parseRecord (tokenInfo *const token) +{ + /* + * Make it a bit forgiving, this is called from + * multiple functions, parseTable, parseType + */ + if (!isType (token, TOKEN_OPEN_PAREN)) + readToken (token); + + Assert (isType (token, TOKEN_OPEN_PAREN)); + do + { + if ( isType (token, TOKEN_COMMA) || isType (token, TOKEN_OPEN_PAREN) ) + readToken (token); + + /* + * Create table statements can end with various constraints + * which must be excluded from the SQLTAG_FIELD. + * create table t1 ( + * c1 integer, + * c2 char(30), + * c3 numeric(10,5), + * c4 integer, + * constraint whatever, + * primary key(c1), + * foreign key (), + * check () + * ) + */ + if (! (isKeyword(token, KEYWORD_primary) || + isKeyword(token, KEYWORD_references) || + isKeyword(token, KEYWORD_unique) || + isKeyword(token, KEYWORD_check) || + isKeyword(token, KEYWORD_constraint) || + isKeyword(token, KEYWORD_foreign) ) ) + { + if (isType (token, TOKEN_IDENTIFIER) || + isType (token, TOKEN_STRING)) + makeSqlTag (token, SQLTAG_FIELD); + } + + while (!(isType (token, TOKEN_COMMA) || + isType (token, TOKEN_CLOSE_PAREN) || + isType (token, TOKEN_OPEN_PAREN) + )) + { + readToken (token); + /* + * A table structure can look like this: + * create table t1 ( + * c1 integer, + * c2 char(30), + * c3 numeric(10,5), + * c4 integer + * ) + * We can't just look for a COMMA or CLOSE_PAREN + * since that will not deal with the numeric(10,5) + * case. So we need to skip the argument list + * when we find an open paren. + */ + if (isType (token, TOKEN_OPEN_PAREN)) + { + /* Reads to the next token after the TOKEN_CLOSE_PAREN */ + skipArgumentList(token); + } + } + } while (! isType (token, TOKEN_CLOSE_PAREN)); +} + +static void parseType (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + vString * saveScope = vStringNew (); + + vStringCopy(saveScope, token->scope); + /* If a scope has been set, add it to the name */ + addToScope (name, token->scope); + readToken (name); + if (isType (name, TOKEN_IDENTIFIER)) + { + readToken (token); + if (isKeyword (token, KEYWORD_is)) + { + readToken (token); + addToScope (token, name->string); + switch (token->keyword) + { + case KEYWORD_record: + case KEYWORD_object: + makeSqlTag (name, SQLTAG_RECORD); + parseRecord (token); + break; + + case KEYWORD_table: + makeSqlTag (name, SQLTAG_TABLE); + break; + + case KEYWORD_ref: + readToken (token); + if (isKeyword (token, KEYWORD_cursor)) + makeSqlTag (name, SQLTAG_CURSOR); + break; + + default: break; + } + vStringClear (token->scope); + } + } + vStringCopy(token->scope, saveScope); + deleteToken (name); + vStringDelete(saveScope); +} + +static void parseSimple (tokenInfo *const token, const sqlKind kind) +{ + /* This will simply make the tagname from the first word found */ + readToken (token); + if (isType (token, TOKEN_IDENTIFIER) || + isType (token, TOKEN_STRING)) + makeSqlTag (token, kind); +} + +static void parseDeclare (tokenInfo *const token, const boolean local) +{ + /* + * PL/SQL declares are of this format: + * IS|AS + * [declare] + * CURSOR curname ... + * varname1 datatype; + * varname2 datatype; + * varname3 datatype; + * begin + */ + + if (isKeyword (token, KEYWORD_declare)) + readToken (token); + while (! isKeyword (token, KEYWORD_begin) && ! isKeyword (token, KEYWORD_end)) + { + switch (token->keyword) + { + case KEYWORD_cursor: parseSimple (token, SQLTAG_CURSOR); break; + case KEYWORD_function: parseSubProgram (token); break; + case KEYWORD_procedure: parseSubProgram (token); break; + case KEYWORD_subtype: parseSimple (token, SQLTAG_SUBTYPE); break; + case KEYWORD_trigger: parseSimple (token, SQLTAG_TRIGGER); break; + case KEYWORD_type: parseType (token); break; + + default: + if (isType (token, TOKEN_IDENTIFIER)) + { + if (local) + { + makeSqlTag (token, SQLTAG_LOCAL_VARIABLE); + } + else + { + makeSqlTag (token, SQLTAG_VARIABLE); + } + } + break; + } + findToken (token, TOKEN_SEMICOLON); + readToken (token); + } +} + +static void parseDeclareANSI (tokenInfo *const token, const boolean local) +{ + tokenInfo *const type = newToken (); + /* + * ANSI declares are of this format: + * BEGIN + * DECLARE varname1 datatype; + * DECLARE varname2 datatype; + * ... + * + * This differ from PL/SQL where DECLARE preceeds the BEGIN block + * and the DECLARE keyword is not repeated. + */ + while (isKeyword (token, KEYWORD_declare)) + { + readToken (token); + readToken (type); + + if (isKeyword (type, KEYWORD_cursor)) + makeSqlTag (token, SQLTAG_CURSOR); + else if (isKeyword (token, KEYWORD_local) && + isKeyword (type, KEYWORD_temporary)) + { + /* + * DECLARE LOCAL TEMPORARY TABLE table_name ( + * c1 int, + * c2 int + * ); + */ + readToken (token); + if (isKeyword (token, KEYWORD_table)) + { + readToken (token); + if (isType(token, TOKEN_IDENTIFIER) || + isType(token, TOKEN_STRING) ) + { + makeSqlTag (token, SQLTAG_TABLE); + } + } + } + else if (isType (token, TOKEN_IDENTIFIER) || + isType (token, TOKEN_STRING)) + { + if (local) + makeSqlTag (token, SQLTAG_LOCAL_VARIABLE); + else + makeSqlTag (token, SQLTAG_VARIABLE); + } + findToken (token, TOKEN_SEMICOLON); + readToken (token); + } + deleteToken (type); +} + +static void parseLabel (tokenInfo *const token) +{ + /* + * A label has this format: + * <> + * DECLARE + * v_senator VARCHAR2(100) := 'THURMOND, JESSE'; + * BEGIN + * IF total_contributions (v_senator, 'TOBACCO') > 25000 + * THEN + * <> + * DECLARE + * v_senator VARCHAR2(100) := 'WHATEVERIT, TAKES'; + * BEGIN + * ... + */ + + Assert (isType (token, TOKEN_BLOCK_LABEL_BEGIN)); + readToken (token); + if (isType (token, TOKEN_IDENTIFIER)) + { + makeSqlTag (token, SQLTAG_BLOCK_LABEL); + readToken (token); /* read end of label */ + } +} + +static void parseStatements (tokenInfo *const token) +{ + boolean isAnsi = TRUE; + boolean stmtTerm = FALSE; + do + { + if (isType (token, TOKEN_BLOCK_LABEL_BEGIN)) + parseLabel (token); + else + { + switch (token->keyword) + { + case KEYWORD_exception: + /* + * EXCEPTION + * ; + * + * Where an exception handler could be: + * BEGIN + * WHEN OTHERS THEN + * x := x + 3; + * END; + * In this case we need to skip this keyword and + * move on to the next token without reading until + * TOKEN_SEMICOLON; + */ + readToken (token); + continue; + + case KEYWORD_when: + /* + * WHEN statements can be used in exception clauses + * and CASE statements. The CASE statement should skip + * these given below we skip over to an END statement. + * But for an exception clause, we can have: + * EXCEPTION + * WHEN OTHERS THEN + * BEGIN + * x := x + 3; + * END; + * If we skip to the TOKEN_SEMICOLON, we miss the begin + * of a nested BEGIN END block. So read the next token + * after the THEN and restart the LOOP. + */ + while (! isKeyword (token, KEYWORD_then)) + readToken (token); + readToken (token); + continue; + + case KEYWORD_if: + /* + * We do not want to look for a ; since for an empty + * IF block, it would skip over the END. + * IF...THEN + * END IF; + * + * or non-ANSI + * IF ... + * BEGIN + * END + */ + while ( ! isKeyword (token, KEYWORD_then) && + ! isKeyword (token, KEYWORD_begin) ) + { + readToken (token); + } + + if( isKeyword (token, KEYWORD_begin ) ) + { + isAnsi = FALSE; + parseBlock(token, FALSE); + + /* + * Handle the non-Ansi IF blocks. + * parseBlock consumes the END, so if the next + * token in a command terminator (like GO) + * we know we are done with this statement. + */ + if ( isCmdTerm (token) ) + stmtTerm = TRUE; + } + else + { + readToken (token); + parseStatements (token); + /* + * parseStatements returns when it finds an END, an IF + * should follow the END for ANSI anyway. + * IF...THEN + * END IF; + */ + if( isKeyword (token, KEYWORD_end ) ) + readToken (token); + + if( ! isKeyword (token, KEYWORD_if ) ) + { + /* + * Well we need to do something here. + * There are lots of different END statements + * END; + * END CASE; + * ENDIF; + * ENDCASE; + */ + } + } + break; + + case KEYWORD_loop: + case KEYWORD_case: + case KEYWORD_for: + /* + * LOOP... + * END LOOP; + * + * CASE + * WHEN '1' THEN + * END CASE; + * + * FOR loop_name AS cursor_name CURSOR FOR ... + * END FOR; + */ + readToken (token); + parseStatements (token); + + if( isKeyword (token, KEYWORD_end ) ) + readToken (token); + + break; + + case KEYWORD_create: + readToken (token); + parseKeywords(token); + break; + + case KEYWORD_declare: + case KEYWORD_begin: + parseBlock (token, TRUE); + break; + + case KEYWORD_end: + break; + + default: + readToken (token); + break; + } + /* + * Not all statements must end in a semi-colon + * begin + * if current publisher <> 'publish' then + * signal UE_FailStatement + * end if + * end; + * The last statement prior to an end ("signal" above) does + * not need a semi-colon, nor does the end if, since it is + * also the last statement prior to the end of the block. + * + * So we must read to the first semi-colon or an END block + */ + while ( ! stmtTerm && + ! ( isKeyword (token, KEYWORD_end) || + (isCmdTerm(token)) ) + ) + { + readToken (token); + + if (isType (token, TOKEN_OPEN_PAREN) || + isType (token, TOKEN_OPEN_CURLY) || + isType (token, TOKEN_OPEN_SQUARE) ) + skipToMatched (token); + + } + } + /* + * We assumed earlier all statements ended with a command terminator. + * See comment above, now, only read if the current token + * is not a command terminator. + */ + if ( isCmdTerm(token) ) + { + readToken (token); + } + } while (! isKeyword (token, KEYWORD_end) && ! stmtTerm ); +} + +static void parseBlock (tokenInfo *const token, const boolean local) +{ + if (isType (token, TOKEN_BLOCK_LABEL_BEGIN)) + { + parseLabel (token); + readToken (token); + } + if (! isKeyword (token, KEYWORD_begin)) + { + readToken (token); + /* + * These are Oracle style declares which generally come + * between an IS/AS and BEGIN block. + */ + parseDeclare (token, local); + } + if (isKeyword (token, KEYWORD_begin)) + { + readToken (token); + /* + * Check for ANSI declarations which always follow + * a BEGIN statement. This routine will not advance + * the token if none are found. + */ + parseDeclareANSI (token, local); + token->begin_end_nest_lvl++; + while (! isKeyword (token, KEYWORD_end)) + { + parseStatements (token); + } + token->begin_end_nest_lvl--; + + /* + * Read the next token (we will assume + * it is the command delimiter) + */ + readToken (token); + + /* + * Check if the END block is terminated + */ + if ( !isCmdTerm (token) ) + { + /* + * Not sure what to do here at the moment. + * I think the routine that calls parseBlock + * must expect the next token has already + * been read since it is possible this + * token is not a command delimiter. + */ + /* findCmdTerm (token, FALSE); */ + } + } +} + +static void parsePackage (tokenInfo *const token) +{ + /* + * Packages can be specified in a number of ways: + * CREATE OR REPLACE PACKAGE pkg_name AS + * or + * CREATE OR REPLACE PACKAGE owner.pkg_name AS + * or by specifying a package body + * CREATE OR REPLACE PACKAGE BODY pkg_name AS + * CREATE OR REPLACE PACKAGE BODY owner.pkg_name AS + */ + tokenInfo *const name = newToken (); + readToken (name); + if (isKeyword (name, KEYWORD_body)) + { + /* + * Ignore the BODY tag since we will process + * the body or prototypes in the same manner + */ + readToken (name); + } + /* Check for owner.pkg_name */ + while (! isKeyword (token, KEYWORD_is)) + { + readToken (token); + if ( isType(token, TOKEN_PERIOD) ) + { + readToken (name); + } + } + if (isKeyword (token, KEYWORD_is)) + { + if (isType (name, TOKEN_IDENTIFIER) || + isType (name, TOKEN_STRING)) + makeSqlTag (name, SQLTAG_PACKAGE); + parseBlock (token, FALSE); + } + findCmdTerm (token, FALSE); + deleteToken (name); +} + +static void parseTable (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats: + * create table t1 (c1 int); + * create global tempoary table t2 (c1 int); + * create table "t3" (c1 int); + * create table bob.t4 (c1 int); + * create table bob."t5" (c1 int); + * create table "bob"."t6" (c1 int); + * create table bob."t7" (c1 int); + * Proxy tables use this format: + * create existing table bob."t7" AT '...'; + * SQL Server and Sybase formats + * create table OnlyTable ( + * create table dbo.HasOwner ( + * create table [dbo].[HasOwnerSquare] ( + * create table master.dbo.HasDb ( + * create table master..HasDbNoOwner ( + * create table [master].dbo.[HasDbAndOwnerSquare] ( + * create table [master]..[HasDbNoOwnerSquare] ( + */ + + /* This could be a database, owner or table name */ + readToken (name); + if (isType (name, TOKEN_OPEN_SQUARE)) + { + readToken (name); + /* Read close square */ + readToken (token); + } + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + /* + * This could be a owner or table name. + * But this is also a special case since the table can be + * referenced with a blank owner: + * dbname..tablename + */ + readToken (name); + if (isType (name, TOKEN_OPEN_SQUARE)) + { + readToken (name); + /* Read close square */ + readToken (token); + } + /* Check if a blank name was provided */ + if (isType (name, TOKEN_PERIOD)) + { + readToken (name); + if (isType (name, TOKEN_OPEN_SQUARE)) + { + readToken (name); + /* Read close square */ + readToken (token); + } + } + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + /* This can only be the table name */ + readToken (name); + if (isType (name, TOKEN_OPEN_SQUARE)) + { + readToken (name); + /* Read close square */ + readToken (token); + } + readToken (token); + } + } + if (isType (token, TOKEN_OPEN_PAREN)) + { + if (isType (name, TOKEN_IDENTIFIER) || + isType (name, TOKEN_STRING)) + { + makeSqlTag (name, SQLTAG_TABLE); + vStringCopy(token->scope, name->string); + parseRecord (token); + vStringClear (token->scope); + } + } + else if (isKeyword (token, KEYWORD_at)) + { + if (isType (name, TOKEN_IDENTIFIER)) + { + makeSqlTag (name, SQLTAG_TABLE); + } + } + findCmdTerm (token, FALSE); + deleteToken (name); +} + +static void parseIndex (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + tokenInfo *const owner = newToken (); + + /* + * This deals with these formats + * create index i1 on t1(c1) create index "i2" on t1(c1) + * create virtual unique clustered index "i3" on t1(c1) + * create unique clustered index "i4" on t1(c1) + * create clustered index "i5" on t1(c1) + * create bitmap index "i6" on t1(c1) + */ + + readToken (name); + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + readToken (name); + readToken (token); + } + if ( isKeyword (token, KEYWORD_on) && + (isType (name, TOKEN_IDENTIFIER) || isType (name, TOKEN_STRING) ) ) + { + readToken (owner); + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + readToken (owner); + readToken (token); + } + addToScope(name, owner->string); + makeSqlTag (name, SQLTAG_INDEX); + } + findCmdTerm (token, FALSE); + deleteToken (name); + deleteToken (owner); +} + +static void parseEvent (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * create event e1 handler begin end; + * create event "e2" handler begin end; + * create event dba."e3" handler begin end; + * create event "dba"."e4" handler begin end; + */ + + readToken (name); + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + readToken (name); + } + while (! (isKeyword (token, KEYWORD_handler) || + (isType (token, TOKEN_SEMICOLON))) ) + { + readToken (token); + } + + if ( isKeyword (token, KEYWORD_handler) || + isType (token, TOKEN_SEMICOLON) ) + { + makeSqlTag (name, SQLTAG_EVENT); + } + + if (isKeyword (token, KEYWORD_handler)) + { + readToken (token); + if ( isKeyword (token, KEYWORD_begin) ) + { + parseBlock (token, TRUE); + } + findCmdTerm (token, TRUE); + } + deleteToken (name); +} + +static void parseTrigger (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + tokenInfo *const table = newToken (); + + /* + * This deals with these formats + * create or replace trigger tr1 begin end; + * create trigger "tr2" begin end; + * drop trigger "droptr1"; + * create trigger "tr3" CALL sp_something(); + * create trigger "owner"."tr4" begin end; + * create trigger "tr5" not valid; + * create trigger "tr6" begin end; + */ + + readToken (name); + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + readToken (name); + readToken (token); + } + + while ( !isKeyword (token, KEYWORD_on) && + !isCmdTerm (token) ) + { + readToken (token); + } + + /*if (! isType (token, TOKEN_SEMICOLON) ) */ + if (! isCmdTerm (token) ) + { + readToken (table); + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + readToken (table); + readToken (token); + } + + while (! (isKeyword (token, KEYWORD_begin) || + (isKeyword (token, KEYWORD_call)) || + ( isCmdTerm (token))) ) + { + if ( isKeyword (token, KEYWORD_declare) ) + { + addToScope(token, name->string); + parseDeclare(token, TRUE); + vStringClear(token->scope); + } + else + readToken (token); + } + + if ( isKeyword (token, KEYWORD_begin) || + isKeyword (token, KEYWORD_call) ) + { + addToScope(name, table->string); + makeSqlTag (name, SQLTAG_TRIGGER); + addToScope(token, table->string); + if ( isKeyword (token, KEYWORD_begin) ) + { + parseBlock (token, TRUE); + } + vStringClear(token->scope); + } + } + + findCmdTerm (token, TRUE); + deleteToken (name); + deleteToken (table); +} + +static void parsePublication (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * create or replace publication pu1 () + * create publication "pu2" () + * create publication dba."pu3" () + * create publication "dba"."pu4" () + */ + + readToken (name); + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + readToken (name); + readToken (token); + } + if (isType (token, TOKEN_OPEN_PAREN)) + { + if (isType (name, TOKEN_IDENTIFIER) || + isType (name, TOKEN_STRING)) + { + makeSqlTag (name, SQLTAG_PUBLICATION); + } + } + findCmdTerm (token, FALSE); + deleteToken (name); +} + +static void parseService (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * CREATE SERVICE s1 TYPE 'HTML' + * AUTHORIZATION OFF USER DBA AS + * SELECT * + * FROM SYS.SYSTABLE; + * CREATE SERVICE "s2" TYPE 'HTML' + * AUTHORIZATION OFF USER DBA AS + * CALL sp_Something(); + */ + + readToken (name); + readToken (token); + if (isKeyword (token, KEYWORD_type)) + { + if (isType (name, TOKEN_IDENTIFIER) || + isType (name, TOKEN_STRING)) + { + makeSqlTag (name, SQLTAG_SERVICE); + } + } + findCmdTerm (token, FALSE); + deleteToken (name); +} + +static void parseDomain (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * CREATE DOMAIN|DATATYPE [AS] your_name ...; + */ + + readToken (name); + if (isKeyword (name, KEYWORD_is)) + { + readToken (name); + } + readToken (token); + if (isType (name, TOKEN_IDENTIFIER) || + isType (name, TOKEN_STRING)) + { + makeSqlTag (name, SQLTAG_DOMAIN); + } + findCmdTerm (token, FALSE); + deleteToken (name); +} + +static void parseDrop (tokenInfo *const token) +{ + /* + * This deals with these formats + * DROP TABLE|PROCEDURE|DOMAIN|DATATYPE name; + * + * Just simply skip over these statements. + * They are often confused with PROCEDURE prototypes + * since the syntax is similar, this effectively deals with + * the issue for all types. + */ + + findCmdTerm (token, FALSE); +} + +static void parseVariable (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * create variable varname1 integer; + * create variable @varname2 integer; + * create variable "varname3" integer; + * drop variable @varname3; + */ + + readToken (name); + readToken (token); + if ( (isType (name, TOKEN_IDENTIFIER) || isType (name, TOKEN_STRING)) + && !isType (token, TOKEN_SEMICOLON) ) + { + makeSqlTag (name, SQLTAG_VARIABLE); + } + findCmdTerm (token, TRUE); + + deleteToken (name); +} + +static void parseSynonym (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * create variable varname1 integer; + * create variable @varname2 integer; + * create variable "varname3" integer; + * drop variable @varname3; + */ + + readToken (name); + readToken (token); + if ( (isType (name, TOKEN_IDENTIFIER) || isType (name, TOKEN_STRING)) + && isKeyword (token, KEYWORD_for) ) + { + makeSqlTag (name, SQLTAG_SYNONYM); + } + findCmdTerm (token, TRUE); + + deleteToken (name); +} + +static void parseView (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + /* + * This deals with these formats + * create variable varname1 integer; + * create variable @varname2 integer; + * create variable "varname3" integer; + * drop variable @varname3; + */ + + readToken (name); + readToken (token); + if (isType (token, TOKEN_PERIOD)) + { + readToken (name); + readToken (token); + } + if ( isType (token, TOKEN_OPEN_PAREN) ) + { + skipArgumentList(token); + + } + + while (!(isKeyword (token, KEYWORD_is) || + isType (token, TOKEN_SEMICOLON) + )) + { + readToken (token); + } + + if ( (isType (name, TOKEN_IDENTIFIER) || isType (name, TOKEN_STRING)) + && isKeyword (token, KEYWORD_is) ) + { + makeSqlTag (name, SQLTAG_VIEW); + } + + findCmdTerm (token, TRUE); + + deleteToken (name); +} + +static void parseMLTable (tokenInfo *const token) +{ + tokenInfo *const version = newToken (); + tokenInfo *const table = newToken (); + tokenInfo *const event = newToken (); + + /* + * This deals with these formats + * call dbo.ml_add_table_script( 'version', 'table_name', 'event', + * 'some SQL statement' + * ); + */ + + readToken (token); + if ( isType (token, TOKEN_OPEN_PAREN) ) + { + readToken (version); + readToken (token); + while (!(isType (token, TOKEN_COMMA) || + isType (token, TOKEN_CLOSE_PAREN) + )) + { + readToken (token); + } + + if (isType (token, TOKEN_COMMA)) + { + readToken (table); + readToken (token); + while (!(isType (token, TOKEN_COMMA) || + isType (token, TOKEN_CLOSE_PAREN) + )) + { + readToken (token); + } + + if (isType (token, TOKEN_COMMA)) + { + readToken (event); + + if (isType (version, TOKEN_STRING) && + isType (table, TOKEN_STRING) && + isType (event, TOKEN_STRING) ) + { + addToScope(version, table->string); + addToScope(version, event->string); + makeSqlTag (version, SQLTAG_MLTABLE); + } + } + if( !isType (token, TOKEN_CLOSE_PAREN) ) + findToken (token, TOKEN_CLOSE_PAREN); + } + } + + findCmdTerm (token, TRUE); + + deleteToken (version); + deleteToken (table); + deleteToken (event); +} + +static void parseMLConn (tokenInfo *const token) +{ + tokenInfo *const version = newToken (); + tokenInfo *const event = newToken (); + + /* + * This deals with these formats + * call ml_add_connection_script( 'version', 'event', + * 'some SQL statement' + * ); + */ + + readToken (token); + if ( isType (token, TOKEN_OPEN_PAREN) ) + { + readToken (version); + readToken (token); + while (!(isType (token, TOKEN_COMMA) || + isType (token, TOKEN_CLOSE_PAREN) + )) + { + readToken (token); + } + + if (isType (token, TOKEN_COMMA)) + { + readToken (event); + + if (isType (version, TOKEN_STRING) && + isType (event, TOKEN_STRING) ) + { + addToScope(version, event->string); + makeSqlTag (version, SQLTAG_MLCONN); + } + } + if( !isType (token, TOKEN_CLOSE_PAREN) ) + findToken (token, TOKEN_CLOSE_PAREN); + + } + + findCmdTerm (token, TRUE); + + deleteToken (version); + deleteToken (event); +} + +static void parseComment (tokenInfo *const token) +{ + /* + * This deals with this statement: + * COMMENT TO PRESERVE FORMAT ON PROCEDURE "DBA"."test" IS + * {create PROCEDURE DBA."test"() + * BEGIN + * signal dave; + * END + * } + * ; + * The comment can contain anything between the CURLY + * braces + * COMMENT ON USER "admin" IS + * 'Administration Group' + * ; + * Or it could be a simple string with no curly braces + */ + while (! isKeyword (token, KEYWORD_is)) + { + readToken (token); + } + readToken (token); + if ( isType(token, TOKEN_OPEN_CURLY) ) + { + findToken (token, TOKEN_CLOSE_CURLY); + } + + findCmdTerm (token, TRUE); +} + + +static void parseKeywords (tokenInfo *const token) +{ + switch (token->keyword) + { + case KEYWORD_begin: parseBlock (token, FALSE); break; + case KEYWORD_comment: parseComment (token); break; + case KEYWORD_cursor: parseSimple (token, SQLTAG_CURSOR); break; + case KEYWORD_datatype: parseDomain (token); break; + case KEYWORD_declare: parseBlock (token, FALSE); break; + case KEYWORD_domain: parseDomain (token); break; + case KEYWORD_drop: parseDrop (token); break; + case KEYWORD_event: parseEvent (token); break; + case KEYWORD_function: parseSubProgram (token); break; + case KEYWORD_if: parseStatements (token); break; + case KEYWORD_index: parseIndex (token); break; + case KEYWORD_ml_table: parseMLTable (token); break; + case KEYWORD_ml_table_lang: parseMLTable (token); break; + case KEYWORD_ml_table_dnet: parseMLTable (token); break; + case KEYWORD_ml_table_java: parseMLTable (token); break; + case KEYWORD_ml_table_chk: parseMLTable (token); break; + case KEYWORD_ml_conn: parseMLConn (token); break; + case KEYWORD_ml_conn_lang: parseMLConn (token); break; + case KEYWORD_ml_conn_dnet: parseMLConn (token); break; + case KEYWORD_ml_conn_java: parseMLConn (token); break; + case KEYWORD_ml_conn_chk: parseMLConn (token); break; + case KEYWORD_package: parsePackage (token); break; + case KEYWORD_procedure: parseSubProgram (token); break; + case KEYWORD_publication: parsePublication (token); break; + case KEYWORD_service: parseService (token); break; + case KEYWORD_subtype: parseSimple (token, SQLTAG_SUBTYPE); break; + case KEYWORD_synonym: parseSynonym (token); break; + case KEYWORD_table: parseTable (token); break; + case KEYWORD_trigger: parseTrigger (token); break; + case KEYWORD_type: parseType (token); break; + case KEYWORD_variable: parseVariable (token); break; + case KEYWORD_view: parseView (token); break; + default: break; + } +} + +static void parseSqlFile (tokenInfo *const token) +{ + do + { + readToken (token); + + if (isType (token, TOKEN_BLOCK_LABEL_BEGIN)) + parseLabel (token); + else + parseKeywords (token); + } while (! isKeyword (token, KEYWORD_end)); +} + +static void initialize (const langType language) +{ + Assert (sizeof (SqlKinds) / sizeof (SqlKinds [0]) == SQLTAG_COUNT); + Lang_sql = language; + buildSqlKeywordHash (); +} + +static void findSqlTags (void) +{ + tokenInfo *const token = newToken (); + exception_t exception = (exception_t) (setjmp (Exception)); + + while (exception == ExceptionNone) + parseSqlFile (token); + + deleteToken (token); +} + +extern parserDefinition* SqlParser (void) +{ + static const char *const extensions [] = { "sql", NULL }; + parserDefinition* def = parserNew ("SQL"); + def->kinds = SqlKinds; + def->kindCount = KIND_COUNT (SqlKinds); + def->extensions = extensions; + def->parser = findSqlTags; + def->initialize = initialize; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */ diff --git a/strlist.c b/strlist.c new file mode 100644 index 0000000..8797795 --- /dev/null +++ b/strlist.c @@ -0,0 +1,281 @@ +/* +* $Id: strlist.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1999-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions managing resizable string lists. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include +#ifdef HAVE_FNMATCH_H +# include +#endif + +#include "debug.h" +#include "read.h" +#include "routines.h" +#include "strlist.h" + +/* +* FUNCTION DEFINITIONS +*/ + +extern stringList *stringListNew (void) +{ + stringList* const result = xMalloc (1, stringList); + result->max = 0; + result->count = 0; + result->list = NULL; + return result; +} + +extern void stringListAdd (stringList *const current, vString *string) +{ + enum { incrementalIncrease = 10 }; + Assert (current != NULL); + if (current->list == NULL) + { + Assert (current->max == 0); + current->count = 0; + current->max = incrementalIncrease; + current->list = xMalloc (current->max, vString*); + } + else if (current->count == current->max) + { + current->max += incrementalIncrease; + current->list = xRealloc (current->list, current->max, vString*); + } + current->list [current->count++] = string; +} + +extern void stringListRemoveLast (stringList *const current) +{ + Assert (current != NULL); + Assert (current->count > 0); + --current->count; + current->list [current->count] = NULL; +} + +/* Combine list `from' into `current', deleting `from' */ +extern void stringListCombine ( + stringList *const current, stringList *const from) +{ + unsigned int i; + Assert (current != NULL); + Assert (from != NULL); + for (i = 0 ; i < from->count ; ++i) + { + stringListAdd (current, from->list [i]); + from->list [i] = NULL; + } + stringListDelete (from); +} + +extern stringList* stringListNewFromArgv (const char* const* const argv) +{ + stringList* const result = stringListNew (); + const char *const *p; + Assert (argv != NULL); + for (p = argv ; *p != NULL ; ++p) + stringListAdd (result, vStringNewInit (*p)); + return result; +} + +extern stringList* stringListNewFromFile (const char* const fileName) +{ + stringList* result = NULL; + FILE* const fp = fopen (fileName, "r"); + if (fp != NULL) + { + result = stringListNew (); + while (! feof (fp)) + { + vString* const str = vStringNew (); + readLine (str, fp); + vStringStripTrailing (str); + if (vStringLength (str) > 0) + stringListAdd (result, str); + else + vStringDelete (str); + } + } + return result; +} + +extern unsigned int stringListCount (const stringList *const current) +{ + Assert (current != NULL); + return current->count; +} + +extern vString* stringListItem ( + const stringList *const current, const unsigned int indx) +{ + Assert (current != NULL); + return current->list [indx]; +} + +extern vString* stringListLast (const stringList *const current) +{ + Assert (current != NULL); + Assert (current->count > 0); + return current->list [current->count - 1]; +} + +extern void stringListClear (stringList *const current) +{ + unsigned int i; + Assert (current != NULL); + for (i = 0 ; i < current->count ; ++i) + { + vStringDelete (current->list [i]); + current->list [i] = NULL; + } + current->count = 0; +} + +extern void stringListDelete (stringList *const current) +{ + if (current != NULL) + { + if (current->list != NULL) + { + stringListClear (current); + eFree (current->list); + current->list = NULL; + } + current->max = 0; + current->count = 0; + eFree (current); + } +} + +static boolean compareString ( + const char *const string, vString *const itm) +{ + return (boolean) (strcmp (string, vStringValue (itm)) == 0); +} + +static boolean compareStringInsensitive ( + const char *const string, vString *const itm) +{ + return (boolean) (strcasecmp (string, vStringValue (itm)) == 0); +} + +static int stringListIndex ( + const stringList *const current, + const char *const string, + boolean (*test)(const char *s, vString *const vs)) +{ + int result = -1; + unsigned int i; + Assert (current != NULL); + Assert (string != NULL); + Assert (test != NULL); + for (i = 0 ; result == -1 && i < current->count ; ++i) + if ((*test)(string, current->list [i])) + result = i; + return result; +} + +extern boolean stringListHas ( + const stringList *const current, const char *const string) +{ + boolean result = FALSE; + Assert (current != NULL); + result = stringListIndex (current, string, compareString) != -1; + return result; +} + +extern boolean stringListHasInsensitive ( + const stringList *const current, const char *const string) +{ + boolean result = FALSE; + Assert (current != NULL); + Assert (string != NULL); + result = stringListIndex (current, string, compareStringInsensitive) != -1; + return result; +} + +extern boolean stringListHasTest ( + const stringList *const current, boolean (*test)(const char *s)) +{ + boolean result = FALSE; + unsigned int i; + Assert (current != NULL); + for (i = 0 ; ! result && i < current->count ; ++i) + result = (*test)(vStringValue (current->list [i])); + return result; +} + +extern boolean stringListRemoveExtension ( + stringList* const current, const char* const extension) +{ + boolean result = FALSE; + int where; +#ifdef CASE_INSENSITIVE_FILENAMES + where = stringListIndex (current, extension, compareStringInsensitive); +#else + where = stringListIndex (current, extension, compareString); +#endif + if (where != -1) + { + memmove (current->list + where, current->list + where + 1, + (current->count - where) * sizeof (*current->list)); + current->list [current->count - 1] = NULL; + --current->count; + result = TRUE; + } + return result; +} + +extern boolean stringListExtensionMatched ( + const stringList* const current, const char* const extension) +{ +#ifdef CASE_INSENSITIVE_FILENAMES + return stringListHasInsensitive (current, extension); +#else + return stringListHas (current, extension); +#endif +} + +static boolean fileNameMatched ( + const vString* const vpattern, const char* const fileName) +{ + const char* const pattern = vStringValue (vpattern); +#if defined (HAVE_FNMATCH) + return (boolean) (fnmatch (pattern, fileName, 0) == 0); +#elif defined (CASE_INSENSITIVE_FILENAMES) + return (boolean) (strcasecmp (pattern, fileName) == 0); +#else + return (boolean) (strcmp (pattern, fileName) == 0); +#endif +} + +extern boolean stringListFileMatched ( + const stringList* const current, const char* const fileName) +{ + boolean result = FALSE; + unsigned int i; + for (i = 0 ; ! result && i < stringListCount (current) ; ++i) + result = fileNameMatched (stringListItem (current, i), fileName); + return result; +} + +extern void stringListPrint (const stringList *const current) +{ + unsigned int i; + Assert (current != NULL); + for (i = 0 ; i < current->count ; ++i) + printf ("%s%s", (i > 0) ? ", " : "", vStringValue (current->list [i])); +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/strlist.h b/strlist.h new file mode 100644 index 0000000..c0d2909 --- /dev/null +++ b/strlist.h @@ -0,0 +1,54 @@ +/* +* $Id: strlist.h 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 1999-2002, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Defines external interface to resizable string lists. +*/ +#ifndef _STRLIST_H +#define _STRLIST_H + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include "vstring.h" + +/* +* DATA DECLARATIONS +*/ +typedef struct sStringList { + unsigned int max; + unsigned int count; + vString **list; +} stringList; + +/* +* FUNCTION PROTOTYPES +*/ +extern stringList *stringListNew (void); +extern void stringListAdd (stringList *const current, vString *string); +extern void stringListRemoveLast (stringList *const current); +extern void stringListCombine (stringList *const current, stringList *const from); +extern stringList* stringListNewFromArgv (const char* const* const list); +extern stringList* stringListNewFromFile (const char* const fileName); +extern void stringListClear (stringList *const current); +extern unsigned int stringListCount (const stringList *const current); +extern vString* stringListItem (const stringList *const current, const unsigned int indx); +extern vString* stringListLast (const stringList *const current); +extern void stringListDelete (stringList *const current); +extern boolean stringListHasInsensitive (const stringList *const current, const char *const string); +extern boolean stringListHas (const stringList *const current, const char *const string); +extern boolean stringListHasTest (const stringList *const current, boolean (*test)(const char *s)); +extern boolean stringListRemoveExtension (stringList* const current, const char* const extension); +extern boolean stringListExtensionMatched (const stringList* const list, const char* const extension); +extern boolean stringListFileMatched (const stringList* const list, const char* const str); +extern void stringListPrint (const stringList *const current); + +#endif /* _STRLIST_H */ + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/tcl.c b/tcl.c new file mode 100644 index 0000000..b3a3a5b --- /dev/null +++ b/tcl.c @@ -0,0 +1,116 @@ +/* +* $Id: tcl.c 443 2006-05-30 04:37:13Z darren $ +* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for TCL scripts. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "parse.h" +#include "read.h" +#include "vstring.h" + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_CLASS, K_METHOD, K_PROCEDURE +} tclKind; + +static kindOption TclKinds [] = { + { TRUE, 'c', "class", "classes" }, + { TRUE, 'm', "method", "methods" }, + { TRUE, 'p', "procedure", "procedures" } +}; + +/* +* FUNCTION DEFINITIONS +*/ + +static const unsigned char *makeTclTag ( + const unsigned char *cp, + vString *const name, + const tclKind kind) +{ + vStringClear (name); + while ((int) *cp != '\0' && ! isspace ((int) *cp)) + { + vStringPut (name, (int) *cp); + ++cp; + } + vStringTerminate (name); + makeSimpleTag (name, TclKinds, kind); + return cp; +} + +static boolean match (const unsigned char *line, const char *word) +{ + return (boolean) (strncmp ((const char*) line, word, strlen (word)) == 0); +} + +static void findTclTags (void) +{ + vString *name = vStringNew (); + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + const unsigned char *cp; + + while (isspace (line [0])) + ++line; + + if (line [0] == '\0' || line [0] == '#') + continue; + + /* read first word */ + for (cp = line ; *cp != '\0' && ! isspace ((int) *cp) ; ++cp) + ; + if (! isspace ((int) *cp)) + continue; + while (isspace ((int) *cp)) + ++cp; + /* Now `line' points at first word and `cp' points at next word */ + + if (match (line, "proc")) + cp = makeTclTag (cp, name, K_PROCEDURE); + else if (match (line, "class") || match (line, "itcl::class")) + cp = makeTclTag (cp, name, K_CLASS); + else if (match (line, "public") || + match (line, "protected") || + match (line, "private")) + { + if (match (cp, "method")) + { + cp += 6; + while (isspace ((int) *cp)) + ++cp; + cp = makeTclTag (cp, name, K_METHOD); + } + } + } + vStringDelete (name); +} + +extern parserDefinition* TclParser (void) +{ + static const char *const extensions [] = { "tcl", "tk", "wish", "itcl", NULL }; + parserDefinition* def = parserNew ("Tcl"); + def->kinds = TclKinds; + def->kindCount = KIND_COUNT (TclKinds); + def->extensions = extensions; + def->parser = findTclTags; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/tex.c b/tex.c new file mode 100644 index 0000000..a285797 --- /dev/null +++ b/tex.c @@ -0,0 +1,524 @@ +/* + * $Id: tex.c 666 2008-05-15 17:47:31Z dfishburn $ + * + * Copyright (c) 2008, David Fishburn + * + * This source code is released for free distribution under the terms of the + * GNU General Public License. + * + * This module contains functions for generating tags for TeX language files. + * + * Tex language reference: + * http://en.wikibooks.org/wiki/TeX#The_Structure_of_TeX + */ + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ +#include /* to define isalpha () */ +#include +#ifdef DEBUG +#include +#endif + +#include "debug.h" +#include "entry.h" +#include "keyword.h" +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* + * MACROS + */ +#define isType(token,t) (boolean) ((token)->type == (t)) +#define isKeyword(token,k) (boolean) ((token)->keyword == (k)) + +/* + * DATA DECLARATIONS + */ + +typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; + +/* + * Used to specify type of keyword. + */ +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_chapter, + KEYWORD_section, + KEYWORD_subsection, + KEYWORD_subsubsection, + KEYWORD_part, + KEYWORD_paragraph, + KEYWORD_subparagraph +} keywordId; + +/* Used to determine whether keyword is valid for the token language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; +} keywordDesc; + +typedef enum eTokenType { + TOKEN_UNDEFINED, + TOKEN_CHARACTER, + TOKEN_CLOSE_PAREN, + TOKEN_SEMICOLON, + TOKEN_COLON, + TOKEN_COMMA, + TOKEN_KEYWORD, + TOKEN_OPEN_PAREN, + TOKEN_OPERATOR, + TOKEN_IDENTIFIER, + TOKEN_STRING, + TOKEN_PERIOD, + TOKEN_OPEN_CURLY, + TOKEN_CLOSE_CURLY, + TOKEN_EQUAL_SIGN, + TOKEN_EXCLAMATION, + TOKEN_FORWARD_SLASH, + TOKEN_OPEN_SQUARE, + TOKEN_CLOSE_SQUARE, + TOKEN_OPEN_MXML, + TOKEN_CLOSE_MXML, + TOKEN_CLOSE_SGML, + TOKEN_LESS_THAN, + TOKEN_GREATER_THAN, + TOKEN_QUESTION_MARK, + TOKEN_STAR +} tokenType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + vString * string; + vString * scope; + unsigned long lineNumber; + fpos_t filePosition; +} tokenInfo; + +/* + * DATA DEFINITIONS + */ + +static langType Lang_js; + +static jmp_buf Exception; + +typedef enum { + TEXTAG_CHAPTER, + TEXTAG_SECTION, + TEXTAG_SUBSECTION, + TEXTAG_SUBSUBSECTION, + TEXTAG_PART, + TEXTAG_PARAGRAPH, + TEXTAG_SUBPARAGRAPH, + TEXTAG_COUNT +} texKind; + +static kindOption TexKinds [] = { + { TRUE, 'c', "chapter", "chapters" }, + { TRUE, 's', "section", "sections" }, + { TRUE, 'u', "subsection", "subsections" }, + { TRUE, 'b', "subsubsection", "subsubsections" }, + { TRUE, 'p', "part", "parts" }, + { TRUE, 'P', "paragraph", "paragraphs" }, + { TRUE, 'G', "subparagraph", "subparagraphs" } +}; + +static const keywordDesc TexKeywordTable [] = { + /* keyword keyword ID */ + { "chapter", KEYWORD_chapter }, + { "section", KEYWORD_section }, + { "subsection", KEYWORD_subsection }, + { "subsubsection", KEYWORD_subsubsection }, + { "part", KEYWORD_part }, + { "paragraph", KEYWORD_paragraph }, + { "subparagraph", KEYWORD_subparagraph } +}; + +/* + * FUNCTION DEFINITIONS + */ + +static boolean isIdentChar (const int c) +{ + return (boolean) + (isalpha (c) || isdigit (c) || c == '$' || + c == '_' || c == '#'); +} + +static void buildTexKeywordHash (void) +{ + const size_t count = sizeof (TexKeywordTable) / + sizeof (TexKeywordTable [0]); + size_t i; + for (i = 0 ; i < count ; ++i) + { + const keywordDesc* const p = &TexKeywordTable [i]; + addKeyword (p->name, Lang_js, (int) p->id); + } +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + token->string = vStringNew (); + token->scope = vStringNew (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + + return token; +} + +static void deleteToken (tokenInfo *const token) +{ + vStringDelete (token->string); + vStringDelete (token->scope); + eFree (token); +} + +/* + * Tag generation functions + */ + +static void makeConstTag (tokenInfo *const token, const texKind kind) +{ + if (TexKinds [kind].enabled ) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + initTagEntry (&e, name); + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.kindName = TexKinds [kind].name; + e.kind = TexKinds [kind].letter; + + makeTagEntry (&e); + } +} + +static void makeTexTag (tokenInfo *const token, texKind kind) +{ + vString * fulltag; + + if (TexKinds [kind].enabled) + { + /* + * If a scope has been added to the token, change the token + * string to include the scope when making the tag. + */ + if ( vStringLength (token->scope) > 0 ) + { + fulltag = vStringNew (); + vStringCopy (fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue (token->string)); + vStringTerminate (fulltag); + vStringCopy (token->string, fulltag); + vStringDelete (fulltag); + } + makeConstTag (token, kind); + } +} + +/* + * Parsing functions + */ + +static void parseString (vString *const string, const int delimiter) +{ + boolean end = FALSE; + while (! end) + { + int c = fileGetc (); + if (c == EOF) + end = TRUE; + else if (c == '\\') + { + c = fileGetc(); /* This maybe a ' or ". */ + vStringPut (string, c); + } + else if (c == delimiter) + end = TRUE; + else + vStringPut (string, c); + } + vStringTerminate (string); +} + +/* + * Read a C identifier beginning with "firstChar" and places it into + * "name". + */ +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + Assert (isIdentChar (c)); + do + { + vStringPut (string, c); + c = fileGetc (); + } while (isIdentChar (c)); + + vStringTerminate (string); + if (!isspace (c)) + fileUngetc (c); /* unget non-identifier character */ +} + +static void readToken (tokenInfo *const token) +{ + int c; + + token->type = TOKEN_UNDEFINED; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar: + do + { + c = fileGetc (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + while (c == '\t' || c == ' ' || c == '\n'); + + switch (c) + { + case EOF: longjmp (Exception, (int)ExceptionEOF); break; + case '(': token->type = TOKEN_OPEN_PAREN; break; + case ')': token->type = TOKEN_CLOSE_PAREN; break; + case ';': token->type = TOKEN_SEMICOLON; break; + case ',': token->type = TOKEN_COMMA; break; + case '.': token->type = TOKEN_PERIOD; break; + case ':': token->type = TOKEN_COLON; break; + case '{': token->type = TOKEN_OPEN_CURLY; break; + case '}': token->type = TOKEN_CLOSE_CURLY; break; + case '=': token->type = TOKEN_EQUAL_SIGN; break; + case '[': token->type = TOKEN_OPEN_SQUARE; break; + case ']': token->type = TOKEN_CLOSE_SQUARE; break; + case '?': token->type = TOKEN_QUESTION_MARK; break; + case '*': token->type = TOKEN_STAR; break; + + case '\'': + case '"': + token->type = TOKEN_STRING; + parseString (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '\\': + /* + * All Tex tags start with a backslash. + * Check if the next character is an alpha character + * else it is not a potential tex tag. + */ + c = fileGetc (); + if (! isalpha (c)) + fileUngetc (c); + else + { + parseIdentifier (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + token->keyword = analyzeToken (token->string, Lang_js); + if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + break; + + case '%': + fileSkipToCharacter ('\n'); /* % are single line comments */ + goto getNextChar; + break; + + default: + if (! isIdentChar (c)) + token->type = TOKEN_UNDEFINED; + else + { + parseIdentifier (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + token->type = TOKEN_IDENTIFIER; + } + break; + } +} + +static void copyToken (tokenInfo *const dest, tokenInfo *const src) +{ + dest->lineNumber = src->lineNumber; + dest->filePosition = src->filePosition; + dest->type = src->type; + dest->keyword = src->keyword; + vStringCopy (dest->string, src->string); + vStringCopy (dest->scope, src->scope); +} + +/* + * Scanning functions + */ + +static boolean parseTag (tokenInfo *const token, texKind kind) +{ + tokenInfo *const name = newToken (); + vString * fullname; + boolean useLongName = TRUE; + + fullname = vStringNew (); + vStringClear (fullname); + + /* + * Tex tags are of these formats: + * \keyword{any number of words} + * \keyword[short desc]{any number of words} + * \keyword*[short desc]{any number of words} + * + * When a keyword is found, loop through all words within + * the curly braces for the tag name. + */ + + if (isType (token, TOKEN_KEYWORD)) + { + copyToken (name, token); + readToken (token); + } + + if (isType (token, TOKEN_OPEN_SQUARE)) + { + useLongName = FALSE; + + readToken (token); + while (! isType (token, TOKEN_CLOSE_SQUARE) ) + { + if (isType (token, TOKEN_IDENTIFIER)) + { + if (fullname->length > 0) + vStringCatS (fullname, " "); + vStringCatS (fullname, vStringValue (token->string)); + } + readToken (token); + } + vStringTerminate (fullname); + vStringCopy (name->string, fullname); + makeTexTag (name, kind); + } + + if (isType (token, TOKEN_STAR)) + { + readToken (token); + } + + if (isType (token, TOKEN_OPEN_CURLY)) + { + readToken (token); + while (! isType (token, TOKEN_CLOSE_CURLY) ) + { + if (isType (token, TOKEN_IDENTIFIER) && useLongName) + { + if (fullname->length > 0) + vStringCatS (fullname, " "); + vStringCatS (fullname, vStringValue (token->string)); + } + readToken (token); + } + if (useLongName) + { + vStringTerminate (fullname); + vStringCopy (name->string, fullname); + makeTexTag (name, kind); + } + } + + deleteToken (name); + vStringDelete (fullname); + return TRUE; +} + +static void parseTexFile (tokenInfo *const token) +{ + do + { + readToken (token); + + if (isType (token, TOKEN_KEYWORD)) + { + switch (token->keyword) + { + case KEYWORD_chapter: + parseTag (token, TEXTAG_CHAPTER); + break; + case KEYWORD_section: + parseTag (token, TEXTAG_SECTION); + break; + case KEYWORD_subsection: + parseTag (token, TEXTAG_SUBSUBSECTION); + break; + case KEYWORD_subsubsection: + parseTag (token, TEXTAG_SUBSUBSECTION); + break; + case KEYWORD_part: + parseTag (token, TEXTAG_PART); + break; + case KEYWORD_paragraph: + parseTag (token, TEXTAG_PARAGRAPH); + break; + case KEYWORD_subparagraph: + parseTag (token, TEXTAG_SUBPARAGRAPH); + break; + default: + break; + } + } + } while (TRUE); +} + +static void initialize (const langType language) +{ + Assert (sizeof (TexKinds) / sizeof (TexKinds [0]) == TEXTAG_COUNT); + Lang_js = language; + buildTexKeywordHash (); +} + +static void findTexTags (void) +{ + tokenInfo *const token = newToken (); + exception_t exception; + + exception = (exception_t) (setjmp (Exception)); + while (exception == ExceptionNone) + parseTexFile (token); + + deleteToken (token); +} + +/* Create parser definition stucture */ +extern parserDefinition* TexParser (void) +{ + static const char *const extensions [] = { "tex", NULL }; + parserDefinition *const def = parserNew ("Tex"); + def->extensions = extensions; + /* + * New definitions for parsing instead of regex + */ + def->kinds = TexKinds; + def->kindCount = KIND_COUNT (TexKinds); + def->parser = findTexTags; + def->initialize = initialize; + + return def; +} +/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */ diff --git a/verilog.c b/verilog.c new file mode 100644 index 0000000..814f5b0 --- /dev/null +++ b/verilog.c @@ -0,0 +1,340 @@ +/* +* $Id: verilog.c 573 2007-06-26 05:41:27Z elliotth $ +* +* Copyright (c) 2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for the Verilog HDL +* (Hardware Description Language). +* +* Language definition documents: +* http://www.eg.bucknell.edu/~cs320/verilog/verilog-manual.html +* http://www.sutherland-hdl.com/on-line_ref_guide/vlog_ref_top.html +* http://www.verilog.com/VerilogBNF.html +* http://eesun.free.fr/DOC/VERILOG/verilog_manual1.html +*/ + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ + +#include +#include + +#include "debug.h" +#include "get.h" +#include "keyword.h" +#include "parse.h" +#include "read.h" +#include "vstring.h" + +/* + * DATA DECLARATIONS + */ +typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; + +typedef enum { + K_UNDEFINED = -1, + K_CONSTANT, + K_EVENT, + K_FUNCTION, + K_MODULE, + K_NET, + K_PORT, + K_REGISTER, + K_TASK +} verilogKind; + +typedef struct { + const char *keyword; + verilogKind kind; +} keywordAssoc; + +/* + * DATA DEFINITIONS + */ +static int Ungetc; +static int Lang_verilog; +static jmp_buf Exception; + +static kindOption VerilogKinds [] = { + { TRUE, 'c', "constant", "constants (define, parameter, specparam)" }, + { TRUE, 'e', "event", "events" }, + { TRUE, 'f', "function", "functions" }, + { TRUE, 'm', "module", "modules" }, + { TRUE, 'n', "net", "net data types" }, + { TRUE, 'p', "port", "ports" }, + { TRUE, 'r', "register", "register data types" }, + { TRUE, 't', "task", "tasks" } +}; + +static keywordAssoc VerilogKeywordTable [] = { + { "`define", K_CONSTANT }, + { "event", K_EVENT }, + { "function", K_FUNCTION }, + { "inout", K_PORT }, + { "input", K_PORT }, + { "integer", K_REGISTER }, + { "module", K_MODULE }, + { "output", K_PORT }, + { "parameter", K_CONSTANT }, + { "real", K_REGISTER }, + { "realtime", K_REGISTER }, + { "reg", K_REGISTER }, + { "specparam", K_CONSTANT }, + { "supply0", K_NET }, + { "supply1", K_NET }, + { "task", K_TASK }, + { "time", K_REGISTER }, + { "tri0", K_NET }, + { "tri1", K_NET }, + { "triand", K_NET }, + { "tri", K_NET }, + { "trior", K_NET }, + { "trireg", K_NET }, + { "wand", K_NET }, + { "wire", K_NET }, + { "wor", K_NET } +}; + +/* + * FUNCTION DEFINITIONS + */ + +static void initialize (const langType language) +{ + size_t i; + const size_t count = + sizeof (VerilogKeywordTable) / sizeof (VerilogKeywordTable [0]); + Lang_verilog = language; + for (i = 0 ; i < count ; ++i) + { + const keywordAssoc* const p = &VerilogKeywordTable [i]; + addKeyword (p->keyword, language, (int) p->kind); + } +} + +static void vUngetc (int c) +{ + Assert (Ungetc == '\0'); + Ungetc = c; +} + +static int vGetc (void) +{ + int c; + if (Ungetc == '\0') + c = fileGetc (); + else + { + c = Ungetc; + Ungetc = '\0'; + } + if (c == '/') + { + int c2 = fileGetc (); + if (c2 == EOF) + longjmp (Exception, (int) ExceptionEOF); + else if (c2 == '/') /* strip comment until end-of-line */ + { + do + c = fileGetc (); + while (c != '\n' && c != EOF); + } + else if (c2 == '*') /* strip block comment */ + { + c = skipOverCComment(); + } + else + { + fileUngetc (c2); + } + } + else if (c == '"') /* strip string contents */ + { + int c2; + do + c2 = fileGetc (); + while (c2 != '"' && c2 != EOF); + c = '@'; + } + if (c == EOF) + longjmp (Exception, (int) ExceptionEOF); + return c; +} + +static boolean isIdentifierCharacter (const int c) +{ + return (boolean)(isalnum (c) || c == '_' || c == '`'); +} + +static int skipWhite (int c) +{ + while (isspace (c)) + c = vGetc (); + return c; +} + +static int skipPastMatch (const char *const pair) +{ + const int begin = pair [0], end = pair [1]; + int matchLevel = 1; + int c; + do + { + c = vGetc (); + if (c == begin) + ++matchLevel; + else if (c == end) + --matchLevel; + } + while (matchLevel > 0); + return vGetc (); +} + +static boolean readIdentifier (vString *const name, int c) +{ + vStringClear (name); + if (isIdentifierCharacter (c)) + { + while (isIdentifierCharacter (c)) + { + vStringPut (name, c); + c = vGetc (); + } + vUngetc (c); + vStringTerminate (name); + } + return (boolean)(name->length > 0); +} + +static void tagNameList (const verilogKind kind, int c) +{ + vString *name = vStringNew (); + boolean repeat; + Assert (isIdentifierCharacter (c)); + do + { + repeat = FALSE; + if (isIdentifierCharacter (c)) + { + readIdentifier (name, c); + makeSimpleTag (name, VerilogKinds, kind); + } + else + break; + c = skipWhite (vGetc ()); + if (c == '[') + c = skipPastMatch ("[]"); + c = skipWhite (c); + if (c == '=') + { + if (c == '{') + skipPastMatch ("{}"); + else + { + do + c = vGetc (); + while (c != ',' && c != ';'); + } + } + if (c == ',') + { + c = skipWhite (vGetc ()); + repeat = TRUE; + } + else + repeat = FALSE; + } while (repeat); + vStringDelete (name); + vUngetc (c); +} + +static void findTag (vString *const name) +{ + const verilogKind kind = (verilogKind) lookupKeyword (vStringValue (name), Lang_verilog); + if (kind == K_CONSTANT && vStringItem (name, 0) == '`') + { + /* Bug #961001: Verilog compiler directives are line-based. */ + int c = skipWhite (vGetc ()); + readIdentifier (name, c); + makeSimpleTag (name, VerilogKinds, kind); + /* Skip the rest of the line. */ + do { + c = vGetc(); + } while (c != '\n'); + vUngetc (c); + } + else if (kind != K_UNDEFINED) + { + int c = skipWhite (vGetc ()); + + /* Many keywords can have bit width. + * reg [3:0] net_name; + * inout [(`DBUSWIDTH-1):0] databus; + */ + if (c == '(') + c = skipPastMatch ("()"); + c = skipWhite (c); + if (c == '[') + c = skipPastMatch ("[]"); + c = skipWhite (c); + if (c == '#') + { + c = vGetc (); + if (c == '(') + c = skipPastMatch ("()"); + } + c = skipWhite (c); + if (isIdentifierCharacter (c)) + tagNameList (kind, c); + } +} + +static void findVerilogTags (void) +{ + vString *const name = vStringNew (); + volatile boolean newStatement = TRUE; + volatile int c = '\0'; + exception_t exception = (exception_t) setjmp (Exception); + + if (exception == ExceptionNone) while (c != EOF) + { + c = vGetc (); + switch (c) + { + case ';': + case '\n': + newStatement = TRUE; + break; + + case ' ': + case '\t': + break; + + default: + if (newStatement && readIdentifier (name, c)) + findTag (name); + newStatement = FALSE; + break; + } + } + vStringDelete (name); +} + +extern parserDefinition* VerilogParser (void) +{ + static const char *const extensions [] = { "v", NULL }; + parserDefinition* def = parserNew ("Verilog"); + def->kinds = VerilogKinds; + def->kindCount = KIND_COUNT (VerilogKinds); + def->extensions = extensions; + def->parser = findVerilogTags; + def->initialize = initialize; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4: */ diff --git a/vhdl.c b/vhdl.c new file mode 100644 index 0000000..994d2e1 --- /dev/null +++ b/vhdl.c @@ -0,0 +1,835 @@ +/* +* $Id: vhdl.c 652 2008-04-18 03:51:47Z elliotth $ +* +* Copyright (c) 2008, Nicolas Vincent +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* This module contains functions for generating tags for VHDL files. +*/ + +/* + * INCLUDE FILES + */ +#include "general.h" /* must always come first */ + +#include /* to define isalpha () */ +#include +#include + +#include "debug.h" +#include "entry.h" +#include "keyword.h" +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" + +/* + * MACROS + */ +#define isType(token,t) (boolean) ((token)->type == (t)) +#define isKeyword(token,k) (boolean) ((token)->keyword == (k)) + +/* + * DATA DECLARATIONS + */ +typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; + +/* + * Used to specify type of keyword. + */ +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_ABS, + KEYWORD_ACCESS, + KEYWORD_AFTER, + KEYWORD_ALIAS, + KEYWORD_ALL, + KEYWORD_AND, + KEYWORD_ARCHITECTURE, + KEYWORD_ARRAY, + KEYWORD_ASSERT, + KEYWORD_ATTRIBUTE, + KEYWORD_BEGIN, + KEYWORD_BLOCK, + KEYWORD_BODY, + KEYWORD_BUFFER, + KEYWORD_BUS, + KEYWORD_CASE, + KEYWORD_COMPONENT, + KEYWORD_CONFIGURATION, + KEYWORD_CONSTANT, + KEYWORD_DISCONNECT, + KEYWORD_DOWNTO, + KEYWORD_ELSE, + KEYWORD_ELSIF, + KEYWORD_END, + KEYWORD_ENTITY, + KEYWORD_EXIT, + KEYWORD_FILE, + KEYWORD_FOR, + KEYWORD_FUNCTION, + KEYWORD_GENERATE, + KEYWORD_GENERIC, + KEYWORD_GROUP, + KEYWORD_GUARDED, + KEYWORD_IF, + KEYWORD_IMPURE, + KEYWORD_IN, + KEYWORD_INERTIAL, + KEYWORD_INOUT, + KEYWORD_IS, + KEYWORD_LABEL, + KEYWORD_LIBRARY, + KEYWORD_LINKAGE, + KEYWORD_LITERAL, + KEYWORD_LOOP, + KEYWORD_MAP, + KEYWORD_MOD, + KEYWORD_NAND, + KEYWORD_NEW, + KEYWORD_NEXT, + KEYWORD_NOR, + KEYWORD_NOT, + KEYWORD_NULL, + KEYWORD_OF, + KEYWORD_ON, + KEYWORD_OPEN, + KEYWORD_OR, + KEYWORD_OTHERS, + KEYWORD_OUT, + KEYWORD_PACKAGE, + KEYWORD_PORT, + KEYWORD_POSTPONED, + KEYWORD_PROCEDURE, + KEYWORD_PROCESS, + KEYWORD_PURE, + KEYWORD_RANGE, + KEYWORD_RECORD, + KEYWORD_REGISTER, + KEYWORD_REJECT, + KEYWORD_RETURN, + KEYWORD_ROL, + KEYWORD_ROR, + KEYWORD_SELECT, + KEYWORD_SEVERITY, + KEYWORD_SIGNAL, + KEYWORD_SHARED, + KEYWORD_SLA, + KEYWORD_SLI, + KEYWORD_SRA, + KEYWORD_SRL, + KEYWORD_SUBTYPE, + KEYWORD_THEN, + KEYWORD_TO, + KEYWORD_TRANSPORT, + KEYWORD_TYPE, + KEYWORD_UNAFFECTED, + KEYWORD_UNITS, + KEYWORD_UNTIL, + KEYWORD_USE, + KEYWORD_VARIABLE, + KEYWORD_WAIT, + KEYWORD_WHEN, + KEYWORD_WHILE, + KEYWORD_WITH, + KEYWORD_XNOR, + KEYWORD_XOR +} keywordId; + +/* Used to determine whether keyword is valid for the current language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; +} keywordDesc; + +typedef enum eTokenType { + TOKEN_NONE, /* none */ + TOKEN_OPEN_PAREN, /* ( */ + TOKEN_CLOSE_PAREN, /* ) */ + TOKEN_COMMA, /* the comma character */ + TOKEN_IDENTIFIER, + TOKEN_KEYWORD, + TOKEN_PERIOD, /* . */ + TOKEN_OPERATOR, + TOKEN_SEMICOLON, /* the semicolon character */ + TOKEN_STRING +} tokenType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + vString *string; /* the name of the token */ + vString *scope; + unsigned long lineNumber; /* line number of tag */ + fpos_t filePosition; /* file position of line containing name */ +} tokenInfo; + +/* + * DATA DEFINITIONS + */ +static int Lang_vhdl; +static jmp_buf Exception; + +/* Used to index into the VhdlKinds table. */ +typedef enum { + VHDLTAG_UNDEFINED = -1, + VHDLTAG_CONSTANT, + VHDLTAG_TYPE, + VHDLTAG_SUBTYPE, + VHDLTAG_RECORD, + VHDLTAG_ENTITY, + VHDLTAG_COMPONENT, + VHDLTAG_PROTOTYPE, + VHDLTAG_FUNCTION, + VHDLTAG_PROCEDURE, + VHDLTAG_PACKAGE, + VHDLTAG_LOCAL +} vhdlKind; + +static kindOption VhdlKinds[] = { + {TRUE, 'c', "constant", "constant declarations"}, + {TRUE, 't', "type", "type definitions"}, + {TRUE, 'T', "subtype", "subtype definitions"}, + {TRUE, 'r', "record", "record names"}, + {TRUE, 'e', "entity", "entity declarations"}, + {FALSE, 'C', "component", "component declarations"}, + {FALSE, 'd', "prototype", "prototypes"}, + {TRUE, 'f', "function", "function prototypes and declarations"}, + {TRUE, 'p', "procedure", "procedure prototypes and declarations"}, + {TRUE, 'P', "package", "package definitions"}, + {FALSE, 'l', "local", "local definitions"} +}; + +static keywordDesc VhdlKeywordTable[] = { + {"abs", KEYWORD_ABS}, + {"access", KEYWORD_ACCESS}, + {"after", KEYWORD_AFTER}, + {"alias", KEYWORD_ALIAS}, + {"all", KEYWORD_ALL}, + {"and", KEYWORD_AND}, + {"architecture", KEYWORD_ARCHITECTURE}, + {"array", KEYWORD_ARRAY}, + {"assert", KEYWORD_ASSERT}, + {"attribute", KEYWORD_ATTRIBUTE}, + {"begin", KEYWORD_BEGIN}, + {"block", KEYWORD_BLOCK}, + {"body", KEYWORD_BODY}, + {"buffer", KEYWORD_BUFFER}, + {"bus", KEYWORD_BUS}, + {"case", KEYWORD_CASE}, + {"component", KEYWORD_COMPONENT}, + {"configuration", KEYWORD_CONFIGURATION}, + {"constant", KEYWORD_CONSTANT}, + {"disconnect", KEYWORD_DISCONNECT}, + {"downto", KEYWORD_DOWNTO}, + {"else", KEYWORD_ELSE}, + {"elsif", KEYWORD_ELSIF}, + {"end", KEYWORD_END}, + {"entity", KEYWORD_ENTITY}, + {"exit", KEYWORD_EXIT}, + {"file", KEYWORD_FILE}, + {"for", KEYWORD_FOR}, + {"function", KEYWORD_FUNCTION}, + {"generate", KEYWORD_GENERATE}, + {"generic", KEYWORD_GENERIC}, + {"group", KEYWORD_GROUP}, + {"guarded", KEYWORD_GUARDED}, + {"if", KEYWORD_IF}, + {"impure", KEYWORD_IMPURE}, + {"in", KEYWORD_IN}, + {"inertial", KEYWORD_INERTIAL}, + {"inout", KEYWORD_INOUT}, + {"is", KEYWORD_IS}, + {"label", KEYWORD_LABEL}, + {"library", KEYWORD_LIBRARY}, + {"linkage", KEYWORD_LINKAGE}, + {"literal", KEYWORD_LITERAL}, + {"loop", KEYWORD_LOOP}, + {"map", KEYWORD_MAP}, + {"mod", KEYWORD_MOD}, + {"nand", KEYWORD_NAND}, + {"new", KEYWORD_NEW}, + {"next", KEYWORD_NEXT}, + {"nor", KEYWORD_NOR}, + {"not", KEYWORD_NOT}, + {"null", KEYWORD_NULL}, + {"of", KEYWORD_OF}, + {"on", KEYWORD_ON}, + {"open", KEYWORD_OPEN}, + {"or", KEYWORD_OR}, + {"others", KEYWORD_OTHERS}, + {"out", KEYWORD_OUT}, + {"package", KEYWORD_PACKAGE}, + {"port", KEYWORD_PORT}, + {"postponed", KEYWORD_POSTPONED}, + {"procedure", KEYWORD_PROCEDURE}, + {"process", KEYWORD_PROCESS}, + {"pure", KEYWORD_PURE}, + {"range", KEYWORD_RANGE}, + {"record", KEYWORD_RECORD}, + {"register", KEYWORD_REGISTER}, + {"reject", KEYWORD_REJECT}, + {"return", KEYWORD_RETURN}, + {"rol", KEYWORD_ROL}, + {"ror", KEYWORD_ROR}, + {"select", KEYWORD_SELECT}, + {"severity", KEYWORD_SEVERITY}, + {"signal", KEYWORD_SIGNAL}, + {"shared", KEYWORD_SHARED}, + {"sla", KEYWORD_SLA}, + {"sli", KEYWORD_SLI}, + {"sra", KEYWORD_SRA}, + {"srl", KEYWORD_SRL}, + {"subtype", KEYWORD_SUBTYPE}, + {"then", KEYWORD_THEN}, + {"to", KEYWORD_TO}, + {"transport", KEYWORD_TRANSPORT}, + {"type", KEYWORD_TYPE}, + {"unaffected", KEYWORD_UNAFFECTED}, + {"units", KEYWORD_UNITS}, + {"until", KEYWORD_UNTIL}, + {"use", KEYWORD_USE}, + {"variable", KEYWORD_VARIABLE}, + {"wait", KEYWORD_WAIT}, + {"when", KEYWORD_WHEN}, + {"while", KEYWORD_WHILE}, + {"with", KEYWORD_WITH}, + {"xnor", KEYWORD_XNOR}, + {"xor", KEYWORD_XOR} +}; + +/* + * FUNCTION DECLARATIONS + */ +static void parseKeywords (tokenInfo * const token, boolean local); + +/* + * FUNCTION DEFINITIONS + */ + +static boolean isIdentChar1 (const int c) +{ + return (boolean) (isalpha (c) || c == '_'); +} + +static boolean isIdentChar (const int c) +{ + return (boolean) (isalpha (c) || isdigit (c) || c == '_'); +} + +static boolean isIdentifierMatch (const tokenInfo * const token, + const vString * const name) +{ + return (boolean) (isType (token, TOKEN_IDENTIFIER) && + strcasecmp (vStringValue (token->string), vStringValue (name)) == 0); + /* XXX this is copy/paste from eiffel.c and slightly modified */ + /* shouldn't we use strNcasecmp ? */ +} + +static boolean isKeywordOrIdent (const tokenInfo * const token, + const keywordId keyword, const vString * const name) +{ + return (boolean) (isKeyword (token, keyword) || + isIdentifierMatch (token, name)); +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + token->type = TOKEN_NONE; + token->keyword = KEYWORD_NONE; + token->string = vStringNew (); + token->scope = vStringNew (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + return token; +} + +static void deleteToken (tokenInfo * const token) +{ + if (token != NULL) + { + vStringDelete (token->string); + vStringDelete (token->scope); + eFree (token); + } +} + +/* + * Parsing functions + */ + +static void parseString (vString * const string, const int delimiter) +{ + boolean end = FALSE; + while (!end) + { + int c = fileGetc (); + if (c == EOF) + end = TRUE; + else if (c == '\\') + { + c = fileGetc (); /* This maybe a ' or ". */ + vStringPut (string, c); + } + else if (c == delimiter) + end = TRUE; + else + vStringPut (string, c); + } + vStringTerminate (string); +} + +/* Read a VHDL identifier beginning with "firstChar" and place it into "name". +*/ +static void parseIdentifier (vString * const string, const int firstChar) +{ + int c = firstChar; + Assert (isIdentChar1 (c)); + do + { + vStringPut (string, c); + c = fileGetc (); + } while (isIdentChar (c)); + vStringTerminate (string); + if (!isspace (c)) + fileUngetc (c); /* unget non-identifier character */ +} + +static void readToken (tokenInfo * const token) +{ + int c; + + token->type = TOKEN_NONE; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + + getNextChar: + do + { + c = fileGetc (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + } + while (c == '\t' || c == ' ' || c == '\n'); + + switch (c) + { + case EOF: + longjmp (Exception, (int) ExceptionEOF); + break; + case '(': + token->type = TOKEN_OPEN_PAREN; + break; + case ')': + token->type = TOKEN_CLOSE_PAREN; + break; + case ';': + token->type = TOKEN_SEMICOLON; + break; + case '.': + token->type = TOKEN_PERIOD; + break; + case ',': + token->type = TOKEN_COMMA; + break; + case '\'': /* only single char are inside simple quotes */ + break; /* or it is for attributes so we don't care */ + case '"': + token->type = TOKEN_STRING; + parseString (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + case '-': + c = fileGetc (); + if (c == '-') /* start of a comment */ + { + fileSkipToCharacter ('\n'); + goto getNextChar; + } + else + { + if (!isspace (c)) + fileUngetc (c); + token->type = TOKEN_OPERATOR; + } + break; + default: + if (!isIdentChar1 (c)) + token->type = TOKEN_NONE; + else + { + parseIdentifier (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + token->keyword = analyzeToken (token->string, Lang_vhdl); + if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + } + break; + } +} + +static void skipToKeyword (const keywordId keyword) +{ + tokenInfo *const token = newToken (); + do + { + readToken (token); + } + while (!isKeyword (token, keyword)); + deleteToken (token); +} + +static void skipToMatched (tokenInfo * const token) +{ + int nest_level = 0; + tokenType open_token; + tokenType close_token; + + switch (token->type) + { + case TOKEN_OPEN_PAREN: + open_token = TOKEN_OPEN_PAREN; + close_token = TOKEN_CLOSE_PAREN; + break; + default: + return; + } + + /* + * This routine will skip to a matching closing token. + * It will also handle nested tokens like the (, ) below. + * ( name varchar(30), text binary(10) ) + */ + if (isType (token, open_token)) + { + nest_level++; + while (!(isType (token, close_token) && (nest_level == 0))) + { + readToken (token); + if (isType (token, open_token)) + { + nest_level++; + } + if (isType (token, close_token)) + { + if (nest_level > 0) + { + nest_level--; + } + } + } + readToken (token); + } +} + +static void makeConstTag (tokenInfo * const token, const vhdlKind kind) +{ + if (VhdlKinds[kind].enabled) + { + const char *const name = vStringValue (token->string); + tagEntryInfo e; + initTagEntry (&e, name); + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.kindName = VhdlKinds[kind].name; + e.kind = VhdlKinds[kind].letter; + makeTagEntry (&e); + } +} + +static void makeVhdlTag (tokenInfo * const token, const vhdlKind kind) +{ + if (VhdlKinds[kind].enabled) + { + /* + * If a scope has been added to the token, change the token + * string to include the scope when making the tag. + */ + if (vStringLength (token->scope) > 0) + { + vString *fulltag = vStringNew (); + vStringCopy (fulltag, token->scope); + vStringCatS (fulltag, "."); + vStringCatS (fulltag, vStringValue (token->string)); + vStringTerminate (fulltag); + vStringCopy (token->string, fulltag); + vStringDelete (fulltag); + } + makeConstTag (token, kind); + } +} + +static void initialize (const langType language) +{ + size_t i; + const size_t count = + sizeof (VhdlKeywordTable) / sizeof (VhdlKeywordTable[0]); + Lang_vhdl = language; + for (i = 0; i < count; ++i) + { + const keywordDesc *const p = &VhdlKeywordTable[i]; + addKeyword (p->name, language, (int) p->id); + } +} + +static void parsePackage (tokenInfo * const token) +{ + tokenInfo *const name = newToken (); + Assert (isKeyword (token, KEYWORD_PACKAGE)); + readToken (token); + if (isKeyword (token, KEYWORD_BODY)) + { + readToken (name); + makeVhdlTag (name, VHDLTAG_PACKAGE); + } + else if (isType (token, TOKEN_IDENTIFIER)) + { + makeVhdlTag (token, VHDLTAG_PACKAGE); + } + deleteToken (name); +} + +static void parseModule (tokenInfo * const token) +{ + tokenInfo *const name = newToken (); + const vhdlKind kind = isKeyword (token, KEYWORD_ENTITY) ? + VHDLTAG_ENTITY : VHDLTAG_COMPONENT; + Assert (isKeyword (token, KEYWORD_ENTITY) || + isKeyword (token, KEYWORD_COMPONENT)); + readToken (name); + if (kind == VHDLTAG_COMPONENT) + { + makeVhdlTag (name, VHDLTAG_COMPONENT); + skipToKeyword (KEYWORD_END); + fileSkipToCharacter (';'); + } + else + { + readToken (token); + if (isKeyword (token, KEYWORD_IS)) + { + makeVhdlTag (name, VHDLTAG_ENTITY); + skipToKeyword (KEYWORD_END); + fileSkipToCharacter (';'); + } + } + deleteToken (name); +} + +static void parseRecord (tokenInfo * const token) +{ + tokenInfo *const name = newToken (); + Assert (isKeyword (token, KEYWORD_RECORD)); + readToken (name); + do + { + readToken (token); /* should be a colon */ + fileSkipToCharacter (';'); + makeVhdlTag (name, VHDLTAG_RECORD); + readToken (name); + } + while (!isKeyword (name, KEYWORD_END)); + fileSkipToCharacter (';'); + deleteToken (name); +} + +static void parseTypes (tokenInfo * const token) +{ + tokenInfo *const name = newToken (); + const vhdlKind kind = isKeyword (token, KEYWORD_TYPE) ? + VHDLTAG_TYPE : VHDLTAG_SUBTYPE; + Assert (isKeyword (token, KEYWORD_TYPE) || + isKeyword (token, KEYWORD_SUBTYPE)); + readToken (name); + readToken (token); + if (isKeyword (token, KEYWORD_IS)) + { + readToken (token); /* type */ + if (isKeyword (token, KEYWORD_RECORD)) + { + makeVhdlTag (name, kind); + /*TODO: make tags of the record's names */ + parseRecord (token); + } + else + { + makeVhdlTag (name, kind); + } + } + deleteToken (name); +} + +static void parseConstant (boolean local) +{ + tokenInfo *const name = newToken (); + readToken (name); + if (local) + { + makeVhdlTag (name, VHDLTAG_LOCAL); + } + else + { + makeVhdlTag (name, VHDLTAG_CONSTANT); + } + fileSkipToCharacter (';'); + deleteToken (name); +} + +static void parseSubProgram (tokenInfo * const token) +{ + tokenInfo *const name = newToken (); + boolean endSubProgram = FALSE; + const vhdlKind kind = isKeyword (token, KEYWORD_FUNCTION) ? + VHDLTAG_FUNCTION : VHDLTAG_PROCEDURE; + Assert (isKeyword (token, KEYWORD_FUNCTION) || + isKeyword (token, KEYWORD_PROCEDURE)); + readToken (name); /* the name of the function or procedure */ + readToken (token); + if (isType (token, TOKEN_OPEN_PAREN)) + { + skipToMatched (token); + } + + if (kind == VHDLTAG_FUNCTION) + { + if (isKeyword (token, KEYWORD_RETURN)) + { + /* Read datatype */ + readToken (token); + while (! isKeyword (token, KEYWORD_IS) && + ! isType (token, TOKEN_SEMICOLON)) + { + readToken (token); + } + } + } + + if (isType (token, TOKEN_SEMICOLON)) + { + makeVhdlTag (name, VHDLTAG_PROTOTYPE); + } + else if (isKeyword (token, KEYWORD_IS)) + { + if (kind == VHDLTAG_FUNCTION) + { + makeVhdlTag (name, VHDLTAG_FUNCTION); + do + { + readToken (token); + if (isKeyword (token, KEYWORD_END)) + { + readToken (token); + endSubProgram = isKeywordOrIdent (token, + KEYWORD_FUNCTION, name->string); + fileSkipToCharacter (';'); + } + else + { + parseKeywords (token, TRUE); + } + } while (!endSubProgram); + } + else + { + makeVhdlTag (name, VHDLTAG_PROCEDURE); + do + { + readToken (token); + if (isKeyword (token, KEYWORD_END)) + { + readToken (token); + endSubProgram = isKeywordOrIdent (token, + KEYWORD_PROCEDURE, name->string); + fileSkipToCharacter (';'); + } + else + { + parseKeywords (token, TRUE); + } + } while (!endSubProgram); + } + } + deleteToken (name); +} + +/* TODO */ +/* records */ +static void parseKeywords (tokenInfo * const token, boolean local) +{ + switch (token->keyword) + { + case KEYWORD_END: + fileSkipToCharacter (';'); + break; + case KEYWORD_CONSTANT: + parseConstant (local); + break; + case KEYWORD_TYPE: + parseTypes (token); + break; + case KEYWORD_SUBTYPE: + parseTypes (token); + break; + case KEYWORD_ENTITY: + parseModule (token); + break; + case KEYWORD_COMPONENT: + parseModule (token); + break; + case KEYWORD_FUNCTION: + parseSubProgram (token); + break; + case KEYWORD_PROCEDURE: + parseSubProgram (token); + break; + case KEYWORD_PACKAGE: + parsePackage (token); + break; + default: + break; + } +} + +static void parseVhdlFile (tokenInfo * const token) +{ + do + { + readToken (token); + parseKeywords (token, FALSE); + } while (!isKeyword (token, KEYWORD_END)); +} + +static void findVhdlTags (void) +{ + tokenInfo *const token = newToken (); + exception_t exception = (exception_t) (setjmp (Exception)); + + while (exception == ExceptionNone) + parseVhdlFile (token); + + deleteToken (token); +} + +extern parserDefinition *VhdlParser (void) +{ + static const char *const extensions[] = { "vhdl", "vhd", NULL }; + parserDefinition *def = parserNew ("VHDL"); + def->kinds = VhdlKinds; + def->kindCount = KIND_COUNT (VhdlKinds); + def->extensions = extensions; + def->parser = findVhdlTags; + def->initialize = initialize; + return def; +} + +/* vi:set tabstop=4 shiftwidth=4 noet: */ diff --git a/vim.c b/vim.c new file mode 100644 index 0000000..951ee5f --- /dev/null +++ b/vim.c @@ -0,0 +1,636 @@ +/* +* $Id: vim.c 485 2006-10-24 12:06:19Z dfishburn $ +* +* Copyright (c) 2000-2003, Darren Hiebert +* +* This source code is released for free distribution under the terms of the +* GNU General Public License. +* +* Thanks are due to Jay Glanville for significant improvements. +* +* This module contains functions for generating tags for user-defined +* functions for the Vim editor. +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include +#include +#ifdef DEBUG +#include +#endif + + +#include "parse.h" +#include "read.h" +#include "vstring.h" + +#if 0 +typedef struct sLineInfo { + tokenType type; + keywordId keyword; + vString * string; + vString * scope; + unsigned long lineNumber; + fpos_t filePosition; +} lineInfo; +#endif + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_AUGROUP, + K_COMMAND, + K_FUNCTION, + K_MAP, + K_VARIABLE +} vimKind; + +static kindOption VimKinds [] = { + { TRUE, 'a', "augroup", "autocommand groups" }, + { TRUE, 'c', "command", "user-defined commands" }, + { TRUE, 'f', "function", "function definitions" }, + { TRUE, 'm', "map", "maps" }, + { TRUE, 'v', "variable", "variable definitions" }, +}; + +/* + * DATA DECLARATIONS + */ + +#if 0 +typedef enum eException { + ExceptionNone, ExceptionEOF +} exception_t; +#endif + +/* + * DATA DEFINITIONS + */ + +#if 0 +static jmp_buf Exception; +#endif + +/* + * FUNCTION DEFINITIONS + */ + +/* This function takes a char pointer, tries to find a scope separator in the + * string, and if it does, returns a pointer to the character after the colon, + * and the character defining the scope. + * If a colon is not found, it returns the original pointer. + */ +static const unsigned char* skipPrefix (const unsigned char* name, int *scope) +{ + const unsigned char* result = name; + int counter; + size_t length; + length = strlen((const char*)name); + if (scope != NULL) + *scope = '\0'; + if (length > 3 && name[1] == ':') + { + if (scope != NULL) + *scope = *name; + result = name + 2; + } + else if (length > 5 && strncasecmp ((const char*) name, "", (size_t) 5) == 0) + { + if (scope != NULL) + *scope = *name; + result = name + 5; + } + else + { + /* + * Vim7 check for dictionaries or autoload function names + */ + counter = 0; + do + { + switch ( name[counter] ) + { + case '.': + /* Set the scope to d - Dictionary */ + *scope = 'd'; + break; + case '#': + /* Set the scope to a - autoload */ + *scope = 'a'; + break; + } + ++counter; + } while (isalnum ((int) name[counter]) || + name[counter] == '_' || + name[counter] == '.' || + name[counter] == '#' + ); + } + return result; +} + +static boolean isMap (const unsigned char* line) +{ + /* + * There are many different short cuts for specifying a map. + * This routine should capture all the permutations. + */ + if ( + strncmp ((const char*) line, "map", (size_t) 3) == 0 || + strncmp ((const char*) line, "nm", (size_t) 2) == 0 || + strncmp ((const char*) line, "nma", (size_t) 3) == 0 || + strncmp ((const char*) line, "nmap", (size_t) 4) == 0 || + strncmp ((const char*) line, "vm", (size_t) 2) == 0 || + strncmp ((const char*) line, "vma", (size_t) 3) == 0 || + strncmp ((const char*) line, "vmap", (size_t) 4) == 0 || + strncmp ((const char*) line, "om", (size_t) 2) == 0 || + strncmp ((const char*) line, "oma", (size_t) 3) == 0 || + strncmp ((const char*) line, "omap", (size_t) 4) == 0 || + strncmp ((const char*) line, "im", (size_t) 2) == 0 || + strncmp ((const char*) line, "ima", (size_t) 3) == 0 || + strncmp ((const char*) line, "imap", (size_t) 4) == 0 || + strncmp ((const char*) line, "lm", (size_t) 2) == 0 || + strncmp ((const char*) line, "lma", (size_t) 3) == 0 || + strncmp ((const char*) line, "lmap", (size_t) 4) == 0 || + strncmp ((const char*) line, "cm", (size_t) 2) == 0 || + strncmp ((const char*) line, "cma", (size_t) 3) == 0 || + strncmp ((const char*) line, "cmap", (size_t) 4) == 0 || + strncmp ((const char*) line, "no", (size_t) 2) == 0 || + strncmp ((const char*) line, "nor", (size_t) 3) == 0 || + strncmp ((const char*) line, "nore", (size_t) 4) == 0 || + strncmp ((const char*) line, "norem", (size_t) 5) == 0 || + strncmp ((const char*) line, "norema", (size_t) 6) == 0 || + strncmp ((const char*) line, "noremap", (size_t) 7) == 0 || + strncmp ((const char*) line, "nno", (size_t) 3) == 0 || + strncmp ((const char*) line, "nnor", (size_t) 4) == 0 || + strncmp ((const char*) line, "nnore", (size_t) 5) == 0 || + strncmp ((const char*) line, "nnorem", (size_t) 6) == 0 || + strncmp ((const char*) line, "nnorema", (size_t) 7) == 0 || + strncmp ((const char*) line, "nnoremap", (size_t) 8) == 0 || + strncmp ((const char*) line, "vno", (size_t) 3) == 0 || + strncmp ((const char*) line, "vnor", (size_t) 4) == 0 || + strncmp ((const char*) line, "vnore", (size_t) 5) == 0 || + strncmp ((const char*) line, "vnorem", (size_t) 6) == 0 || + strncmp ((const char*) line, "vnorema", (size_t) 7) == 0 || + strncmp ((const char*) line, "vnoremap", (size_t) 8) == 0 || + strncmp ((const char*) line, "ono", (size_t) 3) == 0 || + strncmp ((const char*) line, "onor", (size_t) 4) == 0 || + strncmp ((const char*) line, "onore", (size_t) 5) == 0 || + strncmp ((const char*) line, "onorem", (size_t) 6) == 0 || + strncmp ((const char*) line, "onorema", (size_t) 7) == 0 || + strncmp ((const char*) line, "onoremap", (size_t) 8) == 0 || + strncmp ((const char*) line, "ino", (size_t) 3) == 0 || + strncmp ((const char*) line, "inor", (size_t) 4) == 0 || + strncmp ((const char*) line, "inore", (size_t) 5) == 0 || + strncmp ((const char*) line, "inorem", (size_t) 6) == 0 || + strncmp ((const char*) line, "inorema", (size_t) 7) == 0 || + strncmp ((const char*) line, "inoremap", (size_t) 8) == 0 || + strncmp ((const char*) line, "lno", (size_t) 3) == 0 || + strncmp ((const char*) line, "lnor", (size_t) 4) == 0 || + strncmp ((const char*) line, "lnore", (size_t) 5) == 0 || + strncmp ((const char*) line, "lnorem", (size_t) 6) == 0 || + strncmp ((const char*) line, "lnorema", (size_t) 7) == 0 || + strncmp ((const char*) line, "lnoremap", (size_t) 8) == 0 || + strncmp ((const char*) line, "cno", (size_t) 3) == 0 || + strncmp ((const char*) line, "cnor", (size_t) 4) == 0 || + strncmp ((const char*) line, "cnore", (size_t) 5) == 0 || + strncmp ((const char*) line, "cnorem", (size_t) 6) == 0 || + strncmp ((const char*) line, "cnorema", (size_t) 7) == 0 || + strncmp ((const char*) line, "cnoremap", (size_t) 8) == 0 + ) + return TRUE; + + return FALSE; +} + +static const unsigned char * readVimLine (void) +{ + const unsigned char *line; + + while ((line = fileReadLine ()) != NULL) + { + while (isspace ((int) *line)) + ++line; + + if ((int) *line == '"') + continue; /* skip comment */ + + break; + } + + return line; +} + +static void parseFunction (const unsigned char *line) +{ + vString *name = vStringNew (); + /* boolean inFunction = FALSE; */ + int scope; + + const unsigned char *cp = line + 1; + + if ((int) *++cp == 'n' && (int) *++cp == 'c' && + (int) *++cp == 't' && (int) *++cp == 'i' && + (int) *++cp == 'o' && (int) *++cp == 'n') + ++cp; + if ((int) *cp == '!') + ++cp; + if (isspace ((int) *cp)) + { + while (*cp && isspace ((int) *cp)) + ++cp; + + if (*cp) + { + cp = skipPrefix (cp, &scope); + if (isupper ((int) *cp) || + scope == 's' || /* script scope */ + scope == '<' || /* script scope */ + scope == 'd' || /* dictionary */ + scope == 'a') /* autoload */ + { + do + { + vStringPut (name, (int) *cp); + ++cp; + } while (isalnum ((int) *cp) || *cp == '_' || *cp == '.' || *cp == '#'); + vStringTerminate (name); + makeSimpleTag (name, VimKinds, K_FUNCTION); + vStringClear (name); + } + } + } + + /* TODO - update struct to indicate inside function */ + while ((line = readVimLine ()) != NULL) + { + /* + * Vim7 added the for/endfo[r] construct, so we must first + * check for an "endfo", before a "endf" + */ + if ( (!strncmp ((const char*) line, "endfo", (size_t) 5) == 0) && + (strncmp ((const char*) line, "endf", (size_t) 4) == 0) ) + break; + /* TODO - call parseVimLine */ + } + vStringDelete (name); +} + +static void parseAutogroup (const unsigned char *line) +{ + vString *name = vStringNew (); + + /* Found Autocommand Group (augroup) */ + const unsigned char *cp = line + 2; + if ((int) *++cp == 'r' && (int) *++cp == 'o' && + (int) *++cp == 'u' && (int) *++cp == 'p') + ++cp; + if (isspace ((int) *cp)) + { + while (*cp && isspace ((int) *cp)) + ++cp; + + if (*cp) + { + if (strncasecmp ((const char*) cp, "end", (size_t) 3) != 0) + { + do + { + vStringPut (name, (int) *cp); + ++cp; + } while (isalnum ((int) *cp) || *cp == '_'); + vStringTerminate (name); + makeSimpleTag (name, VimKinds, K_AUGROUP); + vStringClear (name); + } + } + } + vStringDelete (name); +} + +static boolean parseCommand (const unsigned char *line) +{ + vString *name = vStringNew (); + boolean cmdProcessed = TRUE; + + /* + * Found a user-defined command + * + * They can have many options preceeded by a dash + * command! -nargs=+ -complete Select :call s:DB_execSql("select " . ) + * The name of the command should be the first word not preceeded by a dash + * + */ + const unsigned char *cp = line; + + if ( (int) *cp == '\\' ) + { + /* + * We are recursively calling this function is the command + * has been continued on to the next line + * + * Vim statements can be continued onto a newline using a \ + * to indicate the previous line is continuing. + * + * com -nargs=1 -bang -complete=customlist,EditFileComplete + * \ EditFile edit + * + * If the following lines do not have a line continuation + * the command must not be spanning multiple lines and should + * be synatically incorrect. + */ + if ((int) *cp == '\\') + ++cp; + + while (*cp && isspace ((int) *cp)) + ++cp; + } + else if ( (!strncmp ((const char*) line, "comp", (size_t) 4) == 0) && + (!strncmp ((const char*) line, "comc", (size_t) 4) == 0) && + (strncmp ((const char*) line, "com", (size_t) 3) == 0) ) + { + cp += 2; + if ((int) *++cp == 'm' && (int) *++cp == 'a' && + (int) *++cp == 'n' && (int) *++cp == 'd') + ++cp; + + if ((int) *cp == '!') + ++cp; + + while (*cp && isspace ((int) *cp)) + ++cp; + } + else + { + /* + * We are recursively calling this function. If it does not start + * with "com" or a line continuation character, we have moved off + * the command line and should let the other routines parse this file. + */ + cmdProcessed = FALSE; + goto cleanUp; + } + + /* + * Strip off any spaces and options which are part of the command. + * These should preceed the command name. + */ + do + { + if (isspace ((int) *cp)) + { + ++cp; + } + else if (*cp == '-') + { + /* + * Read until the next space which sparates options or the name + */ + while (*cp && !isspace ((int) *cp)) + ++cp; + } + } while ( *cp && !isalnum ((int) *cp) ); + + if ( ! *cp ) + { + /* + * We have reached the end of the line without finding the command name. + * Read the next line and continue processing it as a command. + */ + line = readVimLine(); + parseCommand(line); + goto cleanUp; + } + + do + { + vStringPut (name, (int) *cp); + ++cp; + } while (isalnum ((int) *cp) || *cp == '_'); + + vStringTerminate (name); + makeSimpleTag (name, VimKinds, K_COMMAND); + vStringClear (name); + +cleanUp: + vStringDelete (name); + + return cmdProcessed; +} + +static void parseLet (const unsigned char *line) +{ + vString *name = vStringNew (); + + /* we've found a variable declared outside of a function!! */ + const unsigned char *cp = line + 3; + const unsigned char *np = line; + /* get the name */ + if (isspace ((int) *cp)) + { + while (*cp && isspace ((int) *cp)) + ++cp; + + /* + * Ignore lets which set: + * & - local buffer vim settings + * @ - registers + * [ - Lists or Dictionaries + */ + if (!*cp || *cp == '&' || *cp == '@' || *cp == '[' ) + goto cleanUp; + + /* + * Ignore vim variables which are read only + * v: - Vim variables. + */ + np = cp; + ++np; + if ((int) *cp == 'v' && (int) *np == ':' ) + goto cleanUp; + + /* deal with spaces, $, @ and & */ + while (*cp && *cp != '$' && !isalnum ((int) *cp)) + ++cp; + + if (!*cp) + goto cleanUp; + + /* cp = skipPrefix (cp, &scope); */ + do + { + if (!*cp) + break; + + vStringPut (name, (int) *cp); + ++cp; + } while (isalnum ((int) *cp) || *cp == '_' || *cp == '#' || *cp == ':' || *cp == '$'); + vStringTerminate (name); + makeSimpleTag (name, VimKinds, K_VARIABLE); + vStringClear (name); + } + +cleanUp: + vStringDelete (name); +} + +static boolean parseMap (const unsigned char *line) +{ + vString *name = vStringNew (); + + const unsigned char *cp = line; + + /* Remove map */ + while (*cp && isalnum ((int) *cp)) + ++cp; + + if ((int) *cp == '!') + ++cp; + + /* + * Maps follow this basic format + * map + * nnoremap :Tlist + * map scdt GetColumnDataType + * inoremap ,,, diwi<pa>pa>kA + * inoremap ( =PreviewFunctionSignature() + * + * The Vim help shows the various special arguments available to a map: + * 1.2 SPECIAL ARGUMENTS *:map-arguments* + * + * + *