1 ------------------------------------------------------------------------------
3 -- GNAT COMPILER COMPONENTS --
5 -- S Y S T E M . R E G E X P --
9 -- Copyright (C) 1999-2012, AdaCore --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
19 -- additional permissions described in the GCC Runtime Library Exception, --
20 -- version 3.1, as published by the Free Software Foundation. --
22 -- You should have received a copy of the GNU General Public License and --
23 -- a copy of the GCC Runtime Library Exception along with this program; --
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
25 -- <http://www.gnu.org/licenses/>. --
27 -- GNAT was originally developed by the GNAT team at New York University. --
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
30 ------------------------------------------------------------------------------
32 with Ada.Unchecked_Deallocation;
34 with System.Case_Util;
36 package body System.Regexp is
38 Open_Paren : constant Character := '(';
39 Close_Paren : constant Character := ')';
40 Open_Bracket : constant Character := '[';
41 Close_Bracket : constant Character := ']';
43 type State_Index is new Natural;
44 type Column_Index is new Natural;
46 type Regexp_Array is array
47 (State_Index range <>, Column_Index range <>) of State_Index;
48 -- First index is for the state number
49 -- Second index is for the character type
50 -- Contents is the new State
52 type Regexp_Array_Access is access Regexp_Array;
53 -- Use this type through the functions Set below, so that it
54 -- can grow dynamically depending on the needs.
56 type Mapping is array (Character'Range) of Column_Index;
57 -- Mapping between characters and column in the Regexp_Array
59 type Boolean_Array is array (State_Index range <>) of Boolean;
62 (Alphabet_Size : Column_Index;
63 Num_States : State_Index) is
66 States : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size);
67 Is_Final : Boolean_Array (1 .. Num_States);
68 Case_Sensitive : Boolean;
70 -- Deterministic finite-state machine
72 -----------------------
73 -- Local Subprograms --
74 -----------------------
77 (Table : in out Regexp_Array_Access;
79 Column : Column_Index;
81 -- Sets a value in the table. If the table is too small, reallocate it
82 -- dynamically so that (State, Column) is a valid index in it.
85 (Table : Regexp_Array_Access;
87 Column : Column_Index)
89 -- Returns the value in the table at (State, Column).
90 -- If this index does not exist in the table, returns 0
92 procedure Free is new Ada.Unchecked_Deallocation
93 (Regexp_Array, Regexp_Array_Access);
99 procedure Adjust (R : in out Regexp) is
104 Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size,
105 Num_States => R.R.Num_States);
117 Glob : Boolean := False;
118 Case_Sensitive : Boolean := True)
121 S : String := Pattern;
122 -- The pattern which is really compiled (when the pattern is case
123 -- insensitive, we convert this string to lower-cases
125 Map : Mapping := (others => 0);
126 -- Mapping between characters and columns in the tables
128 Alphabet_Size : Column_Index := 0;
129 -- Number of significant characters in the regular expression.
130 -- This total does not include special operators, such as *, (, ...
132 procedure Check_Well_Formed_Pattern;
133 -- Check that the pattern to compile is well-formed, so that subsequent
134 -- code can rely on this without performing each time the checks to
135 -- avoid accessing the pattern outside its bounds. However, not all
136 -- well-formedness rules are checked. In particular, rules about special
137 -- characters not being treated as regular characters are not checked.
139 procedure Create_Mapping;
140 -- Creates a mapping between characters in the regexp and columns
141 -- in the tables representing the regexp. Test that the regexp is
142 -- well-formed Modifies Alphabet_Size and Map
144 procedure Create_Primary_Table
145 (Table : out Regexp_Array_Access;
146 Num_States : out State_Index;
147 Start_State : out State_Index;
148 End_State : out State_Index);
149 -- Creates the first version of the regexp (this is a non deterministic
150 -- finite state machine, which is unadapted for a fast pattern
151 -- matching algorithm). We use a recursive algorithm to process the
152 -- parenthesis sub-expressions.
154 -- Table : at the end of the procedure : Column 0 is for any character
155 -- ('.') and the last columns are for no character (closure)
156 -- Num_States is set to the number of states in the table
157 -- Start_State is the number of the starting state in the regexp
158 -- End_State is the number of the final state when the regexp matches
160 procedure Create_Primary_Table_Glob
161 (Table : out Regexp_Array_Access;
162 Num_States : out State_Index;
163 Start_State : out State_Index;
164 End_State : out State_Index);
165 -- Same function as above, but it deals with the second possible
166 -- grammar for 'globbing pattern', which is a kind of subset of the
167 -- whole regular expression grammar.
169 function Create_Secondary_Table
170 (First_Table : Regexp_Array_Access;
171 Num_States : State_Index;
172 Start_State : State_Index;
173 End_State : State_Index)
175 -- Creates the definitive table representing the regular expression
176 -- This is actually a transformation of the primary table First_Table,
177 -- where every state is grouped with the states in its 'no-character'
178 -- columns. The transitions between the new states are then recalculated
179 -- and if necessary some new states are created.
181 -- Note that the resulting finite-state machine is not optimized in
182 -- terms of the number of states : it would be more time-consuming to
183 -- add a third pass to reduce the number of states in the machine, with
184 -- no speed improvement...
186 procedure Raise_Exception (M : String; Index : Integer);
187 pragma No_Return (Raise_Exception);
188 -- Raise an exception, indicating an error at character Index in S
190 -------------------------------
191 -- Check_Well_Formed_Pattern --
192 -------------------------------
194 procedure Check_Well_Formed_Pattern is
197 Past_Elmt : Boolean := False;
198 -- Set to True everywhere an elmt has been parsed, if Glob=False,
199 -- meaning there can be now an occurrence of '*', '+' and '?'.
201 Past_Term : Boolean := False;
202 -- Set to True everywhere a term has been parsed, if Glob=False,
203 -- meaning there can be now an occurrence of '|'.
205 Parenthesis_Level : Integer := 0;
206 Curly_Level : Integer := 0;
208 Last_Open : Integer := S'First - 1;
209 -- The last occurrence of an opening parenthesis, if Glob=False,
210 -- or the last occurrence of an opening curly brace, if Glob=True.
212 procedure Raise_Exception_If_No_More_Chars (K : Integer := 0);
213 -- If no more characters are raised, call Raise_Exception
215 --------------------------------------
216 -- Raise_Exception_If_No_More_Chars --
217 --------------------------------------
219 procedure Raise_Exception_If_No_More_Chars (K : Integer := 0) is
221 if J + K > S'Last then
222 Raise_Exception ("Ill-formed pattern while parsing", J);
224 end Raise_Exception_If_No_More_Chars;
226 -- Start of processing for Check_Well_Formed_Pattern
230 while J <= S'Last loop
234 Raise_Exception_If_No_More_Chars;
239 Raise_Exception_If_No_More_Chars;
243 -- The first character never has a special meaning
245 if S (J) = ']' or else S (J) = '-' then
247 Raise_Exception_If_No_More_Chars;
250 -- The set of characters cannot be empty
254 ("Set of characters cannot be empty in regular "
259 Possible_Range_Start : Boolean := True;
260 -- Set True everywhere a range character '-' can occur
264 exit when S (J) = Close_Bracket;
266 -- The current character should be followed by a
269 Raise_Exception_If_No_More_Chars (1);
272 and then S (J + 1) /= Close_Bracket
274 if not Possible_Range_Start then
276 ("No mix of ranges is allowed in "
277 & "regular expression", J);
281 Raise_Exception_If_No_More_Chars;
283 -- Range cannot be followed by '-' character,
284 -- except as last character in the set.
286 Possible_Range_Start := False;
289 Possible_Range_Start := True;
294 Raise_Exception_If_No_More_Chars;
301 -- A closing bracket can end an elmt or term
306 when Close_Bracket =>
308 -- A close bracket must follow a open_bracket, and cannot be
309 -- found alone on the line.
312 ("Incorrect character ']' in regular expression", J);
318 -- Any character can be an elmt or a term
324 -- \ not allowed at the end of the regexp
327 ("Incorrect character '\' in regular expression", J);
332 Parenthesis_Level := Parenthesis_Level + 1;
335 -- An open parenthesis does not end an elmt or term
343 Parenthesis_Level := Parenthesis_Level - 1;
345 if Parenthesis_Level < 0 then
347 ("')' is not associated with '(' in regular "
351 if J = Last_Open + 1 then
353 ("Empty parentheses not allowed in regular "
357 if not Past_Term then
359 ("Closing parenthesis not allowed here in regular "
363 -- A closing parenthesis can end an elmt or term
371 Curly_Level := Curly_Level + 1;
375 -- Any character can be an elmt or a term
381 -- No need to check for ',' as the code always accepts them
385 Curly_Level := Curly_Level - 1;
387 if Curly_Level < 0 then
389 ("'}' is not associated with '{' in regular "
393 if J = Last_Open + 1 then
395 ("Empty curly braces not allowed in regular "
400 -- Any character can be an elmt or a term
406 when '*' | '?' | '+' =>
409 -- These operators must apply to an elmt sub-expression,
410 -- and cannot be found if one has not just been parsed.
412 if not Past_Elmt then
414 ("'*', '+' and '?' operators must be "
415 & "applied to an element in regular expression", J);
425 -- This operator must apply to a term sub-expression,
426 -- and cannot be found if one has not just been parsed.
428 if not Past_Term then
430 ("'|' operator must be "
431 & "applied to a term in regular expression", J);
441 -- Any character can be an elmt or a term
451 -- A closing parenthesis must follow an open parenthesis
453 if Parenthesis_Level /= 0 then
455 ("'(' must always be associated with a ')'", J);
458 -- A closing curly brace must follow an open curly brace
460 if Curly_Level /= 0 then
462 ("'{' must always be associated with a '}'", J);
464 end Check_Well_Formed_Pattern;
470 procedure Create_Mapping is
472 procedure Add_In_Map (C : Character);
473 -- Add a character in the mapping, if it is not already defined
479 procedure Add_In_Map (C : Character) is
482 Alphabet_Size := Alphabet_Size + 1;
483 Map (C) := Alphabet_Size;
487 J : Integer := S'First;
488 Parenthesis_Level : Integer := 0;
489 Curly_Level : Integer := 0;
490 Last_Open : Integer := S'First - 1;
492 -- Start of processing for Create_Mapping
495 while J <= S'Last loop
504 if S (J) = ']' or else S (J) = '-' then
508 -- The first character never has a special meaning
513 ("Ran out of characters while parsing ", J);
516 exit when S (J) = Close_Bracket;
519 and then S (J + 1) /= Close_Bracket
522 Start : constant Integer := J - 1;
531 for Char in S (Start) .. S (J) loop
546 -- A close bracket must follow a open_bracket,
547 -- and cannot be found alone on the line
549 when Close_Bracket =>
551 ("Incorrect character ']' in regular expression", J);
559 -- \ not allowed at the end of the regexp
562 ("Incorrect character '\' in regular expression", J);
567 Parenthesis_Level := Parenthesis_Level + 1;
570 Add_In_Map (Open_Paren);
575 Parenthesis_Level := Parenthesis_Level - 1;
577 if Parenthesis_Level < 0 then
579 ("')' is not associated with '(' in regular "
583 if J = Last_Open + 1 then
585 ("Empty parenthesis not allowed in regular "
590 Add_In_Map (Close_Paren);
602 Curly_Level := Curly_Level + 1;
609 Curly_Level := Curly_Level - 1;
616 ("'*', '+', '?' and '|' operators cannot be in "
617 & "first position in regular expression", J);
625 -- These operators must apply to a sub-expression,
626 -- and cannot be found at the beginning of the line
629 ("'*', '+', '?' and '|' operators cannot be in "
630 & "first position in regular expression", J);
644 -- A closing parenthesis must follow an open parenthesis
646 if Parenthesis_Level /= 0 then
648 ("'(' must always be associated with a ')'", J);
651 if Curly_Level /= 0 then
653 ("'{' must always be associated with a '}'", J);
657 --------------------------
658 -- Create_Primary_Table --
659 --------------------------
661 procedure Create_Primary_Table
662 (Table : out Regexp_Array_Access;
663 Num_States : out State_Index;
664 Start_State : out State_Index;
665 End_State : out State_Index)
667 Empty_Char : constant Column_Index := Alphabet_Size + 1;
669 Current_State : State_Index := 0;
670 -- Index of the last created state
672 procedure Add_Empty_Char
673 (State : State_Index;
674 To_State : State_Index);
675 -- Add a empty-character transition from State to To_State
677 procedure Create_Repetition
678 (Repetition : Character;
679 Start_Prev : State_Index;
680 End_Prev : State_Index;
681 New_Start : out State_Index;
682 New_End : in out State_Index);
683 -- Create the table in case we have a '*', '+' or '?'.
684 -- Start_Prev .. End_Prev should indicate respectively the start and
685 -- end index of the previous expression, to which '*', '+' or '?' is
688 procedure Create_Simple
689 (Start_Index : Integer;
691 Start_State : out State_Index;
692 End_State : out State_Index);
693 -- Fill the table for the regexp Simple.
694 -- This is the recursive procedure called to handle () expressions
695 -- If End_State = 0, then the call to Create_Simple creates an
696 -- independent regexp, not a concatenation
697 -- Start_Index .. End_Index is the starting index in the string S.
699 -- Warning: it may look like we are creating too many empty-string
700 -- transitions, but they are needed to get the correct regexp.
701 -- The table is filled as follow ( s means start-state, e means
704 -- regexp state_num | a b * empty_string
705 -- ------- ------------------------------
709 -- ab 1 (s) | 2 - - -
726 -- (a) 1 (s) | 2 - - -
742 function Next_Sub_Expression
743 (Start_Index : Integer;
746 -- Returns the index of the last character of the next sub-expression
747 -- in Simple. Index cannot be greater than End_Index.
753 procedure Add_Empty_Char
754 (State : State_Index;
755 To_State : State_Index)
757 J : Column_Index := Empty_Char;
760 while Get (Table, State, J) /= 0 loop
764 Set (Table, State, J, To_State);
767 -----------------------
768 -- Create_Repetition --
769 -----------------------
771 procedure Create_Repetition
772 (Repetition : Character;
773 Start_Prev : State_Index;
774 End_Prev : State_Index;
775 New_Start : out State_Index;
776 New_End : in out State_Index)
779 New_Start := Current_State + 1;
782 Add_Empty_Char (New_End, New_Start);
785 Current_State := Current_State + 2;
786 New_End := Current_State;
788 Add_Empty_Char (End_Prev, New_End);
789 Add_Empty_Char (New_Start, Start_Prev);
791 if Repetition /= '+' then
792 Add_Empty_Char (New_Start, New_End);
795 if Repetition /= '?' then
796 Add_Empty_Char (New_End, New_Start);
798 end Create_Repetition;
804 procedure Create_Simple
805 (Start_Index : Integer;
807 Start_State : out State_Index;
808 End_State : out State_Index)
810 J : Integer := Start_Index;
811 Last_Start : State_Index := 0;
816 while J <= End_Index loop
820 J_Start : constant Integer := J + 1;
821 Next_Start : State_Index;
822 Next_End : State_Index;
825 J := Next_Sub_Expression (J, End_Index);
826 Create_Simple (J_Start, J - 1, Next_Start, Next_End);
829 and then (S (J + 1) = '*' or else
830 S (J + 1) = '+' or else
842 Last_Start := Next_Start;
844 if End_State /= 0 then
845 Add_Empty_Char (End_State, Last_Start);
848 End_State := Next_End;
854 Start_Prev : constant State_Index := Start_State;
855 End_Prev : constant State_Index := End_State;
856 Start_J : constant Integer := J + 1;
857 Start_Next : State_Index := 0;
858 End_Next : State_Index := 0;
861 J := Next_Sub_Expression (J, End_Index);
863 -- Create a new state for the start of the alternative
865 Current_State := Current_State + 1;
866 Last_Start := Current_State;
867 Start_State := Last_Start;
869 -- Create the tree for the second part of alternative
871 Create_Simple (Start_J, J, Start_Next, End_Next);
873 -- Create the end state
875 Add_Empty_Char (Last_Start, Start_Next);
876 Add_Empty_Char (Last_Start, Start_Prev);
877 Current_State := Current_State + 1;
878 End_State := Current_State;
879 Add_Empty_Char (End_Prev, End_State);
880 Add_Empty_Char (End_Next, End_State);
884 Current_State := Current_State + 1;
887 Next_State : State_Index := Current_State + 1;
897 for Column in 0 .. Alphabet_Size loop
898 Set (Table, Current_State, Column,
899 Value => Current_State + 1);
903 -- Automatically add the first character
905 if S (J) = '-' or else S (J) = ']' then
906 Set (Table, Current_State, Map (S (J)),
907 Value => Next_State);
911 -- Loop till closing bracket found
914 exit when S (J) = Close_Bracket;
917 and then S (J + 1) /= ']'
920 Start : constant Integer := J - 1;
929 for Char in S (Start) .. S (J) loop
930 Set (Table, Current_State, Map (Char),
931 Value => Next_State);
940 Set (Table, Current_State, Map (S (J)),
941 Value => Next_State);
947 Current_State := Current_State + 1;
949 -- If the next symbol is a special symbol
952 and then (S (J + 1) = '*' or else
953 S (J + 1) = '+' or else
965 Last_Start := Current_State - 1;
967 if End_State /= 0 then
968 Add_Empty_Char (End_State, Last_Start);
971 End_State := Current_State;
974 when '*' | '+' | '?' | Close_Paren | Close_Bracket =>
976 ("Incorrect character in regular expression :", J);
979 Current_State := Current_State + 1;
981 -- Create the state for the symbol S (J)
984 for K in 0 .. Alphabet_Size loop
985 Set (Table, Current_State, K,
986 Value => Current_State + 1);
994 Set (Table, Current_State, Map (S (J)),
995 Value => Current_State + 1);
998 Current_State := Current_State + 1;
1000 -- If the next symbol is a special symbol
1003 and then (S (J + 1) = '*' or else
1004 S (J + 1) = '+' or else
1016 Last_Start := Current_State - 1;
1018 if End_State /= 0 then
1019 Add_Empty_Char (End_State, Last_Start);
1022 End_State := Current_State;
1027 if Start_State = 0 then
1028 Start_State := Last_Start;
1035 -------------------------
1036 -- Next_Sub_Expression --
1037 -------------------------
1039 function Next_Sub_Expression
1040 (Start_Index : Integer;
1041 End_Index : Integer)
1044 J : Integer := Start_Index;
1045 Start_On_Alter : Boolean := False;
1049 Start_On_Alter := True;
1053 exit when J = End_Index;
1060 when Open_Bracket =>
1063 exit when S (J) = Close_Bracket;
1071 J := Next_Sub_Expression (J, End_Index);
1077 if Start_On_Alter then
1087 end Next_Sub_Expression;
1089 -- Start of Create_Primary_Table
1092 Table.all := (others => (others => 0));
1093 Create_Simple (S'First, S'Last, Start_State, End_State);
1094 Num_States := Current_State;
1095 end Create_Primary_Table;
1097 -------------------------------
1098 -- Create_Primary_Table_Glob --
1099 -------------------------------
1101 procedure Create_Primary_Table_Glob
1102 (Table : out Regexp_Array_Access;
1103 Num_States : out State_Index;
1104 Start_State : out State_Index;
1105 End_State : out State_Index)
1107 Empty_Char : constant Column_Index := Alphabet_Size + 1;
1109 Current_State : State_Index := 0;
1110 -- Index of the last created state
1112 procedure Add_Empty_Char
1113 (State : State_Index;
1114 To_State : State_Index);
1115 -- Add a empty-character transition from State to To_State
1117 procedure Create_Simple
1118 (Start_Index : Integer;
1119 End_Index : Integer;
1120 Start_State : out State_Index;
1121 End_State : out State_Index);
1122 -- Fill the table for the S (Start_Index .. End_Index).
1123 -- This is the recursive procedure called to handle () expressions
1125 --------------------
1126 -- Add_Empty_Char --
1127 --------------------
1129 procedure Add_Empty_Char
1130 (State : State_Index;
1131 To_State : State_Index)
1133 J : Column_Index := Empty_Char;
1136 while Get (Table, State, J) /= 0 loop
1140 Set (Table, State, J,
1148 procedure Create_Simple
1149 (Start_Index : Integer;
1150 End_Index : Integer;
1151 Start_State : out State_Index;
1152 End_State : out State_Index)
1154 J : Integer := Start_Index;
1155 Last_Start : State_Index := 0;
1161 while J <= End_Index loop
1164 when Open_Bracket =>
1165 Current_State := Current_State + 1;
1168 Next_State : State_Index := Current_State + 1;
1177 for Column in 0 .. Alphabet_Size loop
1178 Set (Table, Current_State, Column,
1179 Value => Current_State + 1);
1183 -- Automatically add the first character
1185 if S (J) = '-' or else S (J) = ']' then
1186 Set (Table, Current_State, Map (S (J)),
1187 Value => Current_State);
1191 -- Loop till closing bracket found
1194 exit when S (J) = Close_Bracket;
1197 and then S (J + 1) /= ']'
1200 Start : constant Integer := J - 1;
1208 for Char in S (Start) .. S (J) loop
1209 Set (Table, Current_State, Map (Char),
1210 Value => Next_State);
1219 Set (Table, Current_State, Map (S (J)),
1220 Value => Next_State);
1226 Last_Start := Current_State;
1227 Current_State := Current_State + 1;
1229 if End_State /= 0 then
1230 Add_Empty_Char (End_State, Last_Start);
1233 End_State := Current_State;
1238 Start_Regexp_Sub : State_Index;
1239 End_Regexp_Sub : State_Index;
1240 Create_Start : State_Index := 0;
1242 Create_End : State_Index := 0;
1243 -- Initialized to avoid junk warning
1246 while S (J) /= '}' loop
1248 -- First step : find sub pattern
1251 while S (End_Sub) /= ','
1252 and then S (End_Sub) /= '}'
1254 End_Sub := End_Sub + 1;
1257 -- Second step : create a sub pattern
1267 -- Third step : create an alternative
1269 if Create_Start = 0 then
1270 Current_State := Current_State + 1;
1271 Create_Start := Current_State;
1272 Add_Empty_Char (Create_Start, Start_Regexp_Sub);
1273 Current_State := Current_State + 1;
1274 Create_End := Current_State;
1275 Add_Empty_Char (End_Regexp_Sub, Create_End);
1278 Current_State := Current_State + 1;
1279 Add_Empty_Char (Current_State, Create_Start);
1280 Create_Start := Current_State;
1281 Add_Empty_Char (Create_Start, Start_Regexp_Sub);
1282 Add_Empty_Char (End_Regexp_Sub, Create_End);
1286 if End_State /= 0 then
1287 Add_Empty_Char (End_State, Create_Start);
1290 End_State := Create_End;
1291 Last_Start := Create_Start;
1295 Current_State := Current_State + 1;
1297 if End_State /= 0 then
1298 Add_Empty_Char (End_State, Current_State);
1301 Add_Empty_Char (Current_State, Current_State + 1);
1302 Add_Empty_Char (Current_State, Current_State + 3);
1303 Last_Start := Current_State;
1305 Current_State := Current_State + 1;
1307 for K in 0 .. Alphabet_Size loop
1308 Set (Table, Current_State, K,
1309 Value => Current_State + 1);
1312 Current_State := Current_State + 1;
1313 Add_Empty_Char (Current_State, Current_State + 1);
1315 Current_State := Current_State + 1;
1316 Add_Empty_Char (Current_State, Last_Start);
1317 End_State := Current_State;
1320 Current_State := Current_State + 1;
1323 for K in 0 .. Alphabet_Size loop
1324 Set (Table, Current_State, K,
1325 Value => Current_State + 1);
1333 -- Create the state for the symbol S (J)
1335 Set (Table, Current_State, Map (S (J)),
1336 Value => Current_State + 1);
1339 Last_Start := Current_State;
1340 Current_State := Current_State + 1;
1342 if End_State /= 0 then
1343 Add_Empty_Char (End_State, Last_Start);
1346 End_State := Current_State;
1350 if Start_State = 0 then
1351 Start_State := Last_Start;
1358 -- Start of processing for Create_Primary_Table_Glob
1361 Table.all := (others => (others => 0));
1362 Create_Simple (S'First, S'Last, Start_State, End_State);
1363 Num_States := Current_State;
1364 end Create_Primary_Table_Glob;
1366 ----------------------------
1367 -- Create_Secondary_Table --
1368 ----------------------------
1370 function Create_Secondary_Table
1371 (First_Table : Regexp_Array_Access;
1372 Num_States : State_Index;
1373 Start_State : State_Index;
1374 End_State : State_Index) return Regexp
1376 pragma Warnings (Off, Num_States);
1378 Last_Index : constant State_Index := First_Table'Last (1);
1379 type Meta_State is array (1 .. Last_Index) of Boolean;
1381 Table : Regexp_Array (1 .. Last_Index, 0 .. Alphabet_Size) :=
1382 (others => (others => 0));
1384 Meta_States : array (1 .. Last_Index + 1) of Meta_State :=
1385 (others => (others => False));
1387 Temp_State_Not_Null : Boolean;
1389 Is_Final : Boolean_Array (1 .. Last_Index) := (others => False);
1391 Current_State : State_Index := 1;
1392 Nb_State : State_Index := 1;
1395 (State : in out Meta_State;
1396 Item : State_Index);
1397 -- Compute the closure of the state (that is every other state which
1398 -- has a empty-character transition) and add it to the state
1405 (State : in out Meta_State;
1409 if State (Item) then
1413 State (Item) := True;
1415 for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop
1416 if First_Table (Item, Column) = 0 then
1420 Closure (State, First_Table (Item, Column));
1424 -- Start of processing for Create_Secondary_Table
1427 -- Create a new state
1429 Closure (Meta_States (Current_State), Start_State);
1431 while Current_State <= Nb_State loop
1433 -- If this new meta-state includes the primary table end state,
1434 -- then this meta-state will be a final state in the regexp
1436 if Meta_States (Current_State)(End_State) then
1437 Is_Final (Current_State) := True;
1440 -- For every character in the regexp, calculate the possible
1441 -- transitions from Current_State
1443 for Column in 0 .. Alphabet_Size loop
1444 Meta_States (Nb_State + 1) := (others => False);
1445 Temp_State_Not_Null := False;
1447 for K in Meta_States (Current_State)'Range loop
1448 if Meta_States (Current_State)(K)
1449 and then First_Table (K, Column) /= 0
1452 (Meta_States (Nb_State + 1), First_Table (K, Column));
1453 Temp_State_Not_Null := True;
1457 -- If at least one transition existed
1459 if Temp_State_Not_Null then
1461 -- Check if this new state corresponds to an old one
1463 for K in 1 .. Nb_State loop
1464 if Meta_States (K) = Meta_States (Nb_State + 1) then
1465 Table (Current_State, Column) := K;
1470 -- If not, create a new state
1472 if Table (Current_State, Column) = 0 then
1473 Nb_State := Nb_State + 1;
1474 Table (Current_State, Column) := Nb_State;
1479 Current_State := Current_State + 1;
1482 -- Returns the regexp
1488 R := new Regexp_Value (Alphabet_Size => Alphabet_Size,
1489 Num_States => Nb_State);
1491 R.Is_Final := Is_Final (1 .. Nb_State);
1492 R.Case_Sensitive := Case_Sensitive;
1494 for State in 1 .. Nb_State loop
1495 for K in 0 .. Alphabet_Size loop
1496 R.States (State, K) := Table (State, K);
1500 return (Ada.Finalization.Controlled with R => R);
1502 end Create_Secondary_Table;
1504 ---------------------
1505 -- Raise_Exception --
1506 ---------------------
1508 procedure Raise_Exception (M : String; Index : Integer) is
1510 raise Error_In_Regexp with M & " at offset" & Index'Img;
1511 end Raise_Exception;
1513 -- Start of processing for Compile
1516 -- Special case for the empty string: it always matches, and the
1517 -- following processing would fail on it.
1519 return (Ada.Finalization.Controlled with
1520 R => new Regexp_Value'
1521 (Alphabet_Size => 0,
1523 Map => (others => 0),
1524 States => (others => (others => 1)),
1525 Is_Final => (others => True),
1526 Case_Sensitive => True));
1529 if not Case_Sensitive then
1530 System.Case_Util.To_Lower (S);
1533 -- Check the pattern is well-formed before any treatment
1535 Check_Well_Formed_Pattern;
1539 -- Creates the primary table
1542 Table : Regexp_Array_Access;
1543 Num_States : State_Index;
1544 Start_State : State_Index;
1545 End_State : State_Index;
1549 Table := new Regexp_Array (1 .. 100,
1550 0 .. Alphabet_Size + 10);
1552 Create_Primary_Table (Table, Num_States, Start_State, End_State);
1554 Create_Primary_Table_Glob
1555 (Table, Num_States, Start_State, End_State);
1558 -- Creates the secondary table
1560 R := Create_Secondary_Table
1561 (Table, Num_States, Start_State, End_State);
1571 procedure Finalize (R : in out Regexp) is
1572 procedure Free is new
1573 Ada.Unchecked_Deallocation (Regexp_Value, Regexp_Access);
1584 (Table : Regexp_Array_Access;
1585 State : State_Index;
1586 Column : Column_Index) return State_Index
1589 if State <= Table'Last (1)
1590 and then Column <= Table'Last (2)
1592 return Table (State, Column);
1602 function Match (S : String; R : Regexp) return Boolean is
1603 Current_State : State_Index := 1;
1607 raise Constraint_Error;
1610 for Char in S'Range loop
1612 if R.R.Case_Sensitive then
1613 Current_State := R.R.States (Current_State, R.R.Map (S (Char)));
1616 R.R.States (Current_State,
1617 R.R.Map (System.Case_Util.To_Lower (S (Char))));
1620 if Current_State = 0 then
1626 return R.R.Is_Final (Current_State);
1634 (Table : in out Regexp_Array_Access;
1635 State : State_Index;
1636 Column : Column_Index;
1637 Value : State_Index)
1639 New_Lines : State_Index;
1640 New_Columns : Column_Index;
1641 New_Table : Regexp_Array_Access;
1644 if State <= Table'Last (1)
1645 and then Column <= Table'Last (2)
1647 Table (State, Column) := Value;
1649 -- Doubles the size of the table until it is big enough that
1650 -- (State, Column) is a valid index
1652 New_Lines := Table'Last (1) * (State / Table'Last (1) + 1);
1653 New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1);
1654 New_Table := new Regexp_Array (Table'First (1) .. New_Lines,
1655 Table'First (2) .. New_Columns);
1656 New_Table.all := (others => (others => 0));
1658 for J in Table'Range (1) loop
1659 for K in Table'Range (2) loop
1660 New_Table (J, K) := Table (J, K);
1666 Table (State, Column) := Value;