From b9de1157289455b0ca26daff519d4a0ddcd1fa13 Mon Sep 17 00:00:00 2001 From: Dan Albert Date: Wed, 24 Feb 2016 13:48:45 -0800 Subject: Update 4.8.1 to 4.8.3. My previous drop was the wrong version. The platform mingw is currently using 4.8.3, not 4.8.1 (not sure how I got that wrong). From ftp://ftp.gnu.org/gnu/gcc/gcc-4.8.3/gcc-4.8.3.tar.bz2. Bug: http://b/26523949 Change-Id: Id85f1bdcbbaf78c7d0b5a69e74c798a08f341c35 --- gcc-4.8.3/gcc/ada/s-regexp.adb | 1670 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1670 insertions(+) create mode 100644 gcc-4.8.3/gcc/ada/s-regexp.adb (limited to 'gcc-4.8.3/gcc/ada/s-regexp.adb') diff --git a/gcc-4.8.3/gcc/ada/s-regexp.adb b/gcc-4.8.3/gcc/ada/s-regexp.adb new file mode 100644 index 000000000..56c38a8a5 --- /dev/null +++ b/gcc-4.8.3/gcc/ada/s-regexp.adb @@ -0,0 +1,1670 @@ +------------------------------------------------------------------------------ +-- -- +-- GNAT COMPILER COMPONENTS -- +-- -- +-- S Y S T E M . R E G E X P -- +-- -- +-- B o d y -- +-- -- +-- Copyright (C) 1999-2012, AdaCore -- +-- -- +-- GNAT is free software; you can redistribute it and/or modify it under -- +-- terms of the GNU General Public License as published by the Free Soft- -- +-- ware Foundation; either version 3, or (at your option) any later ver- -- +-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- +-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- +-- or FITNESS FOR A PARTICULAR PURPOSE. -- +-- -- +-- As a special exception under Section 7 of GPL version 3, you are granted -- +-- additional permissions described in the GCC Runtime Library Exception, -- +-- version 3.1, as published by the Free Software Foundation. -- +-- -- +-- You should have received a copy of the GNU General Public License and -- +-- a copy of the GCC Runtime Library Exception along with this program; -- +-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- +-- . -- +-- -- +-- GNAT was originally developed by the GNAT team at New York University. -- +-- Extensive contributions were provided by Ada Core Technologies Inc. -- +-- -- +------------------------------------------------------------------------------ + +with Ada.Unchecked_Deallocation; + +with System.Case_Util; + +package body System.Regexp is + + Open_Paren : constant Character := '('; + Close_Paren : constant Character := ')'; + Open_Bracket : constant Character := '['; + Close_Bracket : constant Character := ']'; + + type State_Index is new Natural; + type Column_Index is new Natural; + + type Regexp_Array is array + (State_Index range <>, Column_Index range <>) of State_Index; + -- First index is for the state number + -- Second index is for the character type + -- Contents is the new State + + type Regexp_Array_Access is access Regexp_Array; + -- Use this type through the functions Set below, so that it + -- can grow dynamically depending on the needs. + + type Mapping is array (Character'Range) of Column_Index; + -- Mapping between characters and column in the Regexp_Array + + type Boolean_Array is array (State_Index range <>) of Boolean; + + type Regexp_Value + (Alphabet_Size : Column_Index; + Num_States : State_Index) is + record + Map : Mapping; + States : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size); + Is_Final : Boolean_Array (1 .. Num_States); + Case_Sensitive : Boolean; + end record; + -- Deterministic finite-state machine + + ----------------------- + -- Local Subprograms -- + ----------------------- + + procedure Set + (Table : in out Regexp_Array_Access; + State : State_Index; + Column : Column_Index; + Value : State_Index); + -- Sets a value in the table. If the table is too small, reallocate it + -- dynamically so that (State, Column) is a valid index in it. + + function Get + (Table : Regexp_Array_Access; + State : State_Index; + Column : Column_Index) + return State_Index; + -- Returns the value in the table at (State, Column). + -- If this index does not exist in the table, returns 0 + + procedure Free is new Ada.Unchecked_Deallocation + (Regexp_Array, Regexp_Array_Access); + + ------------ + -- Adjust -- + ------------ + + procedure Adjust (R : in out Regexp) is + Tmp : Regexp_Access; + + begin + if R.R /= null then + Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size, + Num_States => R.R.Num_States); + Tmp.all := R.R.all; + R.R := Tmp; + end if; + end Adjust; + + ------------- + -- Compile -- + ------------- + + function Compile + (Pattern : String; + Glob : Boolean := False; + Case_Sensitive : Boolean := True) + return Regexp + is + S : String := Pattern; + -- The pattern which is really compiled (when the pattern is case + -- insensitive, we convert this string to lower-cases + + Map : Mapping := (others => 0); + -- Mapping between characters and columns in the tables + + Alphabet_Size : Column_Index := 0; + -- Number of significant characters in the regular expression. + -- This total does not include special operators, such as *, (, ... + + procedure Check_Well_Formed_Pattern; + -- Check that the pattern to compile is well-formed, so that subsequent + -- code can rely on this without performing each time the checks to + -- avoid accessing the pattern outside its bounds. However, not all + -- well-formedness rules are checked. In particular, rules about special + -- characters not being treated as regular characters are not checked. + + procedure Create_Mapping; + -- Creates a mapping between characters in the regexp and columns + -- in the tables representing the regexp. Test that the regexp is + -- well-formed Modifies Alphabet_Size and Map + + procedure Create_Primary_Table + (Table : out Regexp_Array_Access; + Num_States : out State_Index; + Start_State : out State_Index; + End_State : out State_Index); + -- Creates the first version of the regexp (this is a non deterministic + -- finite state machine, which is unadapted for a fast pattern + -- matching algorithm). We use a recursive algorithm to process the + -- parenthesis sub-expressions. + -- + -- Table : at the end of the procedure : Column 0 is for any character + -- ('.') and the last columns are for no character (closure) + -- Num_States is set to the number of states in the table + -- Start_State is the number of the starting state in the regexp + -- End_State is the number of the final state when the regexp matches + + procedure Create_Primary_Table_Glob + (Table : out Regexp_Array_Access; + Num_States : out State_Index; + Start_State : out State_Index; + End_State : out State_Index); + -- Same function as above, but it deals with the second possible + -- grammar for 'globbing pattern', which is a kind of subset of the + -- whole regular expression grammar. + + function Create_Secondary_Table + (First_Table : Regexp_Array_Access; + Num_States : State_Index; + Start_State : State_Index; + End_State : State_Index) + return Regexp; + -- Creates the definitive table representing the regular expression + -- This is actually a transformation of the primary table First_Table, + -- where every state is grouped with the states in its 'no-character' + -- columns. The transitions between the new states are then recalculated + -- and if necessary some new states are created. + -- + -- Note that the resulting finite-state machine is not optimized in + -- terms of the number of states : it would be more time-consuming to + -- add a third pass to reduce the number of states in the machine, with + -- no speed improvement... + + procedure Raise_Exception (M : String; Index : Integer); + pragma No_Return (Raise_Exception); + -- Raise an exception, indicating an error at character Index in S + + ------------------------------- + -- Check_Well_Formed_Pattern -- + ------------------------------- + + procedure Check_Well_Formed_Pattern is + J : Integer; + + Past_Elmt : Boolean := False; + -- Set to True everywhere an elmt has been parsed, if Glob=False, + -- meaning there can be now an occurrence of '*', '+' and '?'. + + Past_Term : Boolean := False; + -- Set to True everywhere a term has been parsed, if Glob=False, + -- meaning there can be now an occurrence of '|'. + + Parenthesis_Level : Integer := 0; + Curly_Level : Integer := 0; + + Last_Open : Integer := S'First - 1; + -- The last occurrence of an opening parenthesis, if Glob=False, + -- or the last occurrence of an opening curly brace, if Glob=True. + + procedure Raise_Exception_If_No_More_Chars (K : Integer := 0); + -- If no more characters are raised, call Raise_Exception + + -------------------------------------- + -- Raise_Exception_If_No_More_Chars -- + -------------------------------------- + + procedure Raise_Exception_If_No_More_Chars (K : Integer := 0) is + begin + if J + K > S'Last then + Raise_Exception ("Ill-formed pattern while parsing", J); + end if; + end Raise_Exception_If_No_More_Chars; + + -- Start of processing for Check_Well_Formed_Pattern + + begin + J := S'First; + while J <= S'Last loop + case S (J) is + when Open_Bracket => + J := J + 1; + Raise_Exception_If_No_More_Chars; + + if not Glob then + if S (J) = '^' then + J := J + 1; + Raise_Exception_If_No_More_Chars; + end if; + end if; + + -- The first character never has a special meaning + + if S (J) = ']' or else S (J) = '-' then + J := J + 1; + Raise_Exception_If_No_More_Chars; + end if; + + -- The set of characters cannot be empty + + if S (J) = ']' then + Raise_Exception + ("Set of characters cannot be empty in regular " + & "expression", J); + end if; + + declare + Possible_Range_Start : Boolean := True; + -- Set True everywhere a range character '-' can occur + + begin + loop + exit when S (J) = Close_Bracket; + + -- The current character should be followed by a + -- closing bracket. + + Raise_Exception_If_No_More_Chars (1); + + if S (J) = '-' + and then S (J + 1) /= Close_Bracket + then + if not Possible_Range_Start then + Raise_Exception + ("No mix of ranges is allowed in " + & "regular expression", J); + end if; + + J := J + 1; + Raise_Exception_If_No_More_Chars; + + -- Range cannot be followed by '-' character, + -- except as last character in the set. + + Possible_Range_Start := False; + + else + Possible_Range_Start := True; + end if; + + if S (J) = '\' then + J := J + 1; + Raise_Exception_If_No_More_Chars; + end if; + + J := J + 1; + end loop; + end; + + -- A closing bracket can end an elmt or term + + Past_Elmt := True; + Past_Term := True; + + when Close_Bracket => + + -- A close bracket must follow a open_bracket, and cannot be + -- found alone on the line. + + Raise_Exception + ("Incorrect character ']' in regular expression", J); + + when '\' => + if J < S'Last then + J := J + 1; + + -- Any character can be an elmt or a term + + Past_Elmt := True; + Past_Term := True; + + else + -- \ not allowed at the end of the regexp + + Raise_Exception + ("Incorrect character '\' in regular expression", J); + end if; + + when Open_Paren => + if not Glob then + Parenthesis_Level := Parenthesis_Level + 1; + Last_Open := J; + + -- An open parenthesis does not end an elmt or term + + Past_Elmt := False; + Past_Term := False; + end if; + + when Close_Paren => + if not Glob then + Parenthesis_Level := Parenthesis_Level - 1; + + if Parenthesis_Level < 0 then + Raise_Exception + ("')' is not associated with '(' in regular " + & "expression", J); + end if; + + if J = Last_Open + 1 then + Raise_Exception + ("Empty parentheses not allowed in regular " + & "expression", J); + end if; + + if not Past_Term then + Raise_Exception + ("Closing parenthesis not allowed here in regular " + & "expression", J); + end if; + + -- A closing parenthesis can end an elmt or term + + Past_Elmt := True; + Past_Term := True; + end if; + + when '{' => + if Glob then + Curly_Level := Curly_Level + 1; + Last_Open := J; + + else + -- Any character can be an elmt or a term + + Past_Elmt := True; + Past_Term := True; + end if; + + -- No need to check for ',' as the code always accepts them + + when '}' => + if Glob then + Curly_Level := Curly_Level - 1; + + if Curly_Level < 0 then + Raise_Exception + ("'}' is not associated with '{' in regular " + & "expression", J); + end if; + + if J = Last_Open + 1 then + Raise_Exception + ("Empty curly braces not allowed in regular " + & "expression", J); + end if; + + else + -- Any character can be an elmt or a term + + Past_Elmt := True; + Past_Term := True; + end if; + + when '*' | '?' | '+' => + if not Glob then + + -- These operators must apply to an elmt sub-expression, + -- and cannot be found if one has not just been parsed. + + if not Past_Elmt then + Raise_Exception + ("'*', '+' and '?' operators must be " + & "applied to an element in regular expression", J); + end if; + + Past_Elmt := False; + Past_Term := True; + end if; + + when '|' => + if not Glob then + + -- This operator must apply to a term sub-expression, + -- and cannot be found if one has not just been parsed. + + if not Past_Term then + Raise_Exception + ("'|' operator must be " + & "applied to a term in regular expression", J); + end if; + + Past_Elmt := False; + Past_Term := False; + end if; + + when others => + if not Glob then + + -- Any character can be an elmt or a term + + Past_Elmt := True; + Past_Term := True; + end if; + end case; + + J := J + 1; + end loop; + + -- A closing parenthesis must follow an open parenthesis + + if Parenthesis_Level /= 0 then + Raise_Exception + ("'(' must always be associated with a ')'", J); + end if; + + -- A closing curly brace must follow an open curly brace + + if Curly_Level /= 0 then + Raise_Exception + ("'{' must always be associated with a '}'", J); + end if; + end Check_Well_Formed_Pattern; + + -------------------- + -- Create_Mapping -- + -------------------- + + procedure Create_Mapping is + + procedure Add_In_Map (C : Character); + -- Add a character in the mapping, if it is not already defined + + ---------------- + -- Add_In_Map -- + ---------------- + + procedure Add_In_Map (C : Character) is + begin + if Map (C) = 0 then + Alphabet_Size := Alphabet_Size + 1; + Map (C) := Alphabet_Size; + end if; + end Add_In_Map; + + J : Integer := S'First; + Parenthesis_Level : Integer := 0; + Curly_Level : Integer := 0; + Last_Open : Integer := S'First - 1; + + -- Start of processing for Create_Mapping + + begin + while J <= S'Last loop + case S (J) is + when Open_Bracket => + J := J + 1; + + if S (J) = '^' then + J := J + 1; + end if; + + if S (J) = ']' or else S (J) = '-' then + J := J + 1; + end if; + + -- The first character never has a special meaning + + loop + if J > S'Last then + Raise_Exception + ("Ran out of characters while parsing ", J); + end if; + + exit when S (J) = Close_Bracket; + + if S (J) = '-' + and then S (J + 1) /= Close_Bracket + then + declare + Start : constant Integer := J - 1; + + begin + J := J + 1; + + if S (J) = '\' then + J := J + 1; + end if; + + for Char in S (Start) .. S (J) loop + Add_In_Map (Char); + end loop; + end; + else + if S (J) = '\' then + J := J + 1; + end if; + + Add_In_Map (S (J)); + end if; + + J := J + 1; + end loop; + + -- A close bracket must follow a open_bracket, + -- and cannot be found alone on the line + + when Close_Bracket => + Raise_Exception + ("Incorrect character ']' in regular expression", J); + + when '\' => + if J < S'Last then + J := J + 1; + Add_In_Map (S (J)); + + else + -- \ not allowed at the end of the regexp + + Raise_Exception + ("Incorrect character '\' in regular expression", J); + end if; + + when Open_Paren => + if not Glob then + Parenthesis_Level := Parenthesis_Level + 1; + Last_Open := J; + else + Add_In_Map (Open_Paren); + end if; + + when Close_Paren => + if not Glob then + Parenthesis_Level := Parenthesis_Level - 1; + + if Parenthesis_Level < 0 then + Raise_Exception + ("')' is not associated with '(' in regular " + & "expression", J); + end if; + + if J = Last_Open + 1 then + Raise_Exception + ("Empty parenthesis not allowed in regular " + & "expression", J); + end if; + + else + Add_In_Map (Close_Paren); + end if; + + when '.' => + if Glob then + Add_In_Map ('.'); + end if; + + when '{' => + if not Glob then + Add_In_Map (S (J)); + else + Curly_Level := Curly_Level + 1; + end if; + + when '}' => + if not Glob then + Add_In_Map (S (J)); + else + Curly_Level := Curly_Level - 1; + end if; + + when '*' | '?' => + if not Glob then + if J = S'First then + Raise_Exception + ("'*', '+', '?' and '|' operators cannot be in " + & "first position in regular expression", J); + end if; + end if; + + when '|' | '+' => + if not Glob then + if J = S'First then + + -- These operators must apply to a sub-expression, + -- and cannot be found at the beginning of the line + + Raise_Exception + ("'*', '+', '?' and '|' operators cannot be in " + & "first position in regular expression", J); + end if; + + else + Add_In_Map (S (J)); + end if; + + when others => + Add_In_Map (S (J)); + end case; + + J := J + 1; + end loop; + + -- A closing parenthesis must follow an open parenthesis + + if Parenthesis_Level /= 0 then + Raise_Exception + ("'(' must always be associated with a ')'", J); + end if; + + if Curly_Level /= 0 then + Raise_Exception + ("'{' must always be associated with a '}'", J); + end if; + end Create_Mapping; + + -------------------------- + -- Create_Primary_Table -- + -------------------------- + + procedure Create_Primary_Table + (Table : out Regexp_Array_Access; + Num_States : out State_Index; + Start_State : out State_Index; + End_State : out State_Index) + is + Empty_Char : constant Column_Index := Alphabet_Size + 1; + + Current_State : State_Index := 0; + -- Index of the last created state + + procedure Add_Empty_Char + (State : State_Index; + To_State : State_Index); + -- Add a empty-character transition from State to To_State + + procedure Create_Repetition + (Repetition : Character; + Start_Prev : State_Index; + End_Prev : State_Index; + New_Start : out State_Index; + New_End : in out State_Index); + -- Create the table in case we have a '*', '+' or '?'. + -- Start_Prev .. End_Prev should indicate respectively the start and + -- end index of the previous expression, to which '*', '+' or '?' is + -- applied. + + procedure Create_Simple + (Start_Index : Integer; + End_Index : Integer; + Start_State : out State_Index; + End_State : out State_Index); + -- Fill the table for the regexp Simple. + -- This is the recursive procedure called to handle () expressions + -- If End_State = 0, then the call to Create_Simple creates an + -- independent regexp, not a concatenation + -- Start_Index .. End_Index is the starting index in the string S. + -- + -- Warning: it may look like we are creating too many empty-string + -- transitions, but they are needed to get the correct regexp. + -- The table is filled as follow ( s means start-state, e means + -- end-state) : + -- + -- regexp state_num | a b * empty_string + -- ------- ------------------------------ + -- a 1 (s) | 2 - - - + -- 2 (e) | - - - - + -- + -- ab 1 (s) | 2 - - - + -- 2 | - - - 3 + -- 3 | - 4 - - + -- 4 (e) | - - - - + -- + -- a|b 1 | 2 - - - + -- 2 | - - - 6 + -- 3 | - 4 - - + -- 4 | - - - 6 + -- 5 (s) | - - - 1,3 + -- 6 (e) | - - - - + -- + -- a* 1 | 2 - - - + -- 2 | - - - 4 + -- 3 (s) | - - - 1,4 + -- 4 (e) | - - - 3 + -- + -- (a) 1 (s) | 2 - - - + -- 2 (e) | - - - - + -- + -- a+ 1 | 2 - - - + -- 2 | - - - 4 + -- 3 (s) | - - - 1 + -- 4 (e) | - - - 3 + -- + -- a? 1 | 2 - - - + -- 2 | - - - 4 + -- 3 (s) | - - - 1,4 + -- 4 (e) | - - - - + -- + -- . 1 (s) | 2 2 2 - + -- 2 (e) | - - - - + + function Next_Sub_Expression + (Start_Index : Integer; + End_Index : Integer) + return Integer; + -- Returns the index of the last character of the next sub-expression + -- in Simple. Index cannot be greater than End_Index. + + -------------------- + -- Add_Empty_Char -- + -------------------- + + procedure Add_Empty_Char + (State : State_Index; + To_State : State_Index) + is + J : Column_Index := Empty_Char; + + begin + while Get (Table, State, J) /= 0 loop + J := J + 1; + end loop; + + Set (Table, State, J, To_State); + end Add_Empty_Char; + + ----------------------- + -- Create_Repetition -- + ----------------------- + + procedure Create_Repetition + (Repetition : Character; + Start_Prev : State_Index; + End_Prev : State_Index; + New_Start : out State_Index; + New_End : in out State_Index) + is + begin + New_Start := Current_State + 1; + + if New_End /= 0 then + Add_Empty_Char (New_End, New_Start); + end if; + + Current_State := Current_State + 2; + New_End := Current_State; + + Add_Empty_Char (End_Prev, New_End); + Add_Empty_Char (New_Start, Start_Prev); + + if Repetition /= '+' then + Add_Empty_Char (New_Start, New_End); + end if; + + if Repetition /= '?' then + Add_Empty_Char (New_End, New_Start); + end if; + end Create_Repetition; + + ------------------- + -- Create_Simple -- + ------------------- + + procedure Create_Simple + (Start_Index : Integer; + End_Index : Integer; + Start_State : out State_Index; + End_State : out State_Index) + is + J : Integer := Start_Index; + Last_Start : State_Index := 0; + + begin + Start_State := 0; + End_State := 0; + while J <= End_Index loop + case S (J) is + when Open_Paren => + declare + J_Start : constant Integer := J + 1; + Next_Start : State_Index; + Next_End : State_Index; + + begin + J := Next_Sub_Expression (J, End_Index); + Create_Simple (J_Start, J - 1, Next_Start, Next_End); + + if J < End_Index + and then (S (J + 1) = '*' or else + S (J + 1) = '+' or else + S (J + 1) = '?') + then + J := J + 1; + Create_Repetition + (S (J), + Next_Start, + Next_End, + Last_Start, + End_State); + + else + Last_Start := Next_Start; + + if End_State /= 0 then + Add_Empty_Char (End_State, Last_Start); + end if; + + End_State := Next_End; + end if; + end; + + when '|' => + declare + Start_Prev : constant State_Index := Start_State; + End_Prev : constant State_Index := End_State; + Start_J : constant Integer := J + 1; + Start_Next : State_Index := 0; + End_Next : State_Index := 0; + + begin + J := Next_Sub_Expression (J, End_Index); + + -- Create a new state for the start of the alternative + + Current_State := Current_State + 1; + Last_Start := Current_State; + Start_State := Last_Start; + + -- Create the tree for the second part of alternative + + Create_Simple (Start_J, J, Start_Next, End_Next); + + -- Create the end state + + Add_Empty_Char (Last_Start, Start_Next); + Add_Empty_Char (Last_Start, Start_Prev); + Current_State := Current_State + 1; + End_State := Current_State; + Add_Empty_Char (End_Prev, End_State); + Add_Empty_Char (End_Next, End_State); + end; + + when Open_Bracket => + Current_State := Current_State + 1; + + declare + Next_State : State_Index := Current_State + 1; + + begin + J := J + 1; + + if S (J) = '^' then + J := J + 1; + + Next_State := 0; + + for Column in 0 .. Alphabet_Size loop + Set (Table, Current_State, Column, + Value => Current_State + 1); + end loop; + end if; + + -- Automatically add the first character + + if S (J) = '-' or else S (J) = ']' then + Set (Table, Current_State, Map (S (J)), + Value => Next_State); + J := J + 1; + end if; + + -- Loop till closing bracket found + + loop + exit when S (J) = Close_Bracket; + + if S (J) = '-' + and then S (J + 1) /= ']' + then + declare + Start : constant Integer := J - 1; + + begin + J := J + 1; + + if S (J) = '\' then + J := J + 1; + end if; + + for Char in S (Start) .. S (J) loop + Set (Table, Current_State, Map (Char), + Value => Next_State); + end loop; + end; + + else + if S (J) = '\' then + J := J + 1; + end if; + + Set (Table, Current_State, Map (S (J)), + Value => Next_State); + end if; + J := J + 1; + end loop; + end; + + Current_State := Current_State + 1; + + -- If the next symbol is a special symbol + + if J < End_Index + and then (S (J + 1) = '*' or else + S (J + 1) = '+' or else + S (J + 1) = '?') + then + J := J + 1; + Create_Repetition + (S (J), + Current_State - 1, + Current_State, + Last_Start, + End_State); + + else + Last_Start := Current_State - 1; + + if End_State /= 0 then + Add_Empty_Char (End_State, Last_Start); + end if; + + End_State := Current_State; + end if; + + when '*' | '+' | '?' | Close_Paren | Close_Bracket => + Raise_Exception + ("Incorrect character in regular expression :", J); + + when others => + Current_State := Current_State + 1; + + -- Create the state for the symbol S (J) + + if S (J) = '.' then + for K in 0 .. Alphabet_Size loop + Set (Table, Current_State, K, + Value => Current_State + 1); + end loop; + + else + if S (J) = '\' then + J := J + 1; + end if; + + Set (Table, Current_State, Map (S (J)), + Value => Current_State + 1); + end if; + + Current_State := Current_State + 1; + + -- If the next symbol is a special symbol + + if J < End_Index + and then (S (J + 1) = '*' or else + S (J + 1) = '+' or else + S (J + 1) = '?') + then + J := J + 1; + Create_Repetition + (S (J), + Current_State - 1, + Current_State, + Last_Start, + End_State); + + else + Last_Start := Current_State - 1; + + if End_State /= 0 then + Add_Empty_Char (End_State, Last_Start); + end if; + + End_State := Current_State; + end if; + + end case; + + if Start_State = 0 then + Start_State := Last_Start; + end if; + + J := J + 1; + end loop; + end Create_Simple; + + ------------------------- + -- Next_Sub_Expression -- + ------------------------- + + function Next_Sub_Expression + (Start_Index : Integer; + End_Index : Integer) + return Integer + is + J : Integer := Start_Index; + Start_On_Alter : Boolean := False; + + begin + if S (J) = '|' then + Start_On_Alter := True; + end if; + + loop + exit when J = End_Index; + J := J + 1; + + case S (J) is + when '\' => + J := J + 1; + + when Open_Bracket => + loop + J := J + 1; + exit when S (J) = Close_Bracket; + + if S (J) = '\' then + J := J + 1; + end if; + end loop; + + when Open_Paren => + J := Next_Sub_Expression (J, End_Index); + + when Close_Paren => + return J; + + when '|' => + if Start_On_Alter then + return J - 1; + end if; + + when others => + null; + end case; + end loop; + + return J; + end Next_Sub_Expression; + + -- Start of Create_Primary_Table + + begin + Table.all := (others => (others => 0)); + Create_Simple (S'First, S'Last, Start_State, End_State); + Num_States := Current_State; + end Create_Primary_Table; + + ------------------------------- + -- Create_Primary_Table_Glob -- + ------------------------------- + + procedure Create_Primary_Table_Glob + (Table : out Regexp_Array_Access; + Num_States : out State_Index; + Start_State : out State_Index; + End_State : out State_Index) + is + Empty_Char : constant Column_Index := Alphabet_Size + 1; + + Current_State : State_Index := 0; + -- Index of the last created state + + procedure Add_Empty_Char + (State : State_Index; + To_State : State_Index); + -- Add a empty-character transition from State to To_State + + procedure Create_Simple + (Start_Index : Integer; + End_Index : Integer; + Start_State : out State_Index; + End_State : out State_Index); + -- Fill the table for the S (Start_Index .. End_Index). + -- This is the recursive procedure called to handle () expressions + + -------------------- + -- Add_Empty_Char -- + -------------------- + + procedure Add_Empty_Char + (State : State_Index; + To_State : State_Index) + is + J : Column_Index := Empty_Char; + + begin + while Get (Table, State, J) /= 0 loop + J := J + 1; + end loop; + + Set (Table, State, J, + Value => To_State); + end Add_Empty_Char; + + ------------------- + -- Create_Simple -- + ------------------- + + procedure Create_Simple + (Start_Index : Integer; + End_Index : Integer; + Start_State : out State_Index; + End_State : out State_Index) + is + J : Integer := Start_Index; + Last_Start : State_Index := 0; + + begin + Start_State := 0; + End_State := 0; + + while J <= End_Index loop + case S (J) is + + when Open_Bracket => + Current_State := Current_State + 1; + + declare + Next_State : State_Index := Current_State + 1; + + begin + J := J + 1; + + if S (J) = '^' then + J := J + 1; + Next_State := 0; + + for Column in 0 .. Alphabet_Size loop + Set (Table, Current_State, Column, + Value => Current_State + 1); + end loop; + end if; + + -- Automatically add the first character + + if S (J) = '-' or else S (J) = ']' then + Set (Table, Current_State, Map (S (J)), + Value => Current_State); + J := J + 1; + end if; + + -- Loop till closing bracket found + + loop + exit when S (J) = Close_Bracket; + + if S (J) = '-' + and then S (J + 1) /= ']' + then + declare + Start : constant Integer := J - 1; + begin + J := J + 1; + + if S (J) = '\' then + J := J + 1; + end if; + + for Char in S (Start) .. S (J) loop + Set (Table, Current_State, Map (Char), + Value => Next_State); + end loop; + end; + + else + if S (J) = '\' then + J := J + 1; + end if; + + Set (Table, Current_State, Map (S (J)), + Value => Next_State); + end if; + J := J + 1; + end loop; + end; + + Last_Start := Current_State; + Current_State := Current_State + 1; + + if End_State /= 0 then + Add_Empty_Char (End_State, Last_Start); + end if; + + End_State := Current_State; + + when '{' => + declare + End_Sub : Integer; + Start_Regexp_Sub : State_Index; + End_Regexp_Sub : State_Index; + Create_Start : State_Index := 0; + + Create_End : State_Index := 0; + -- Initialized to avoid junk warning + + begin + while S (J) /= '}' loop + + -- First step : find sub pattern + + End_Sub := J + 1; + while S (End_Sub) /= ',' + and then S (End_Sub) /= '}' + loop + End_Sub := End_Sub + 1; + end loop; + + -- Second step : create a sub pattern + + Create_Simple + (J + 1, + End_Sub - 1, + Start_Regexp_Sub, + End_Regexp_Sub); + + J := End_Sub; + + -- Third step : create an alternative + + if Create_Start = 0 then + Current_State := Current_State + 1; + Create_Start := Current_State; + Add_Empty_Char (Create_Start, Start_Regexp_Sub); + Current_State := Current_State + 1; + Create_End := Current_State; + Add_Empty_Char (End_Regexp_Sub, Create_End); + + else + Current_State := Current_State + 1; + Add_Empty_Char (Current_State, Create_Start); + Create_Start := Current_State; + Add_Empty_Char (Create_Start, Start_Regexp_Sub); + Add_Empty_Char (End_Regexp_Sub, Create_End); + end if; + end loop; + + if End_State /= 0 then + Add_Empty_Char (End_State, Create_Start); + end if; + + End_State := Create_End; + Last_Start := Create_Start; + end; + + when '*' => + Current_State := Current_State + 1; + + if End_State /= 0 then + Add_Empty_Char (End_State, Current_State); + end if; + + Add_Empty_Char (Current_State, Current_State + 1); + Add_Empty_Char (Current_State, Current_State + 3); + Last_Start := Current_State; + + Current_State := Current_State + 1; + + for K in 0 .. Alphabet_Size loop + Set (Table, Current_State, K, + Value => Current_State + 1); + end loop; + + Current_State := Current_State + 1; + Add_Empty_Char (Current_State, Current_State + 1); + + Current_State := Current_State + 1; + Add_Empty_Char (Current_State, Last_Start); + End_State := Current_State; + + when others => + Current_State := Current_State + 1; + + if S (J) = '?' then + for K in 0 .. Alphabet_Size loop + Set (Table, Current_State, K, + Value => Current_State + 1); + end loop; + + else + if S (J) = '\' then + J := J + 1; + end if; + + -- Create the state for the symbol S (J) + + Set (Table, Current_State, Map (S (J)), + Value => Current_State + 1); + end if; + + Last_Start := Current_State; + Current_State := Current_State + 1; + + if End_State /= 0 then + Add_Empty_Char (End_State, Last_Start); + end if; + + End_State := Current_State; + + end case; + + if Start_State = 0 then + Start_State := Last_Start; + end if; + + J := J + 1; + end loop; + end Create_Simple; + + -- Start of processing for Create_Primary_Table_Glob + + begin + Table.all := (others => (others => 0)); + Create_Simple (S'First, S'Last, Start_State, End_State); + Num_States := Current_State; + end Create_Primary_Table_Glob; + + ---------------------------- + -- Create_Secondary_Table -- + ---------------------------- + + function Create_Secondary_Table + (First_Table : Regexp_Array_Access; + Num_States : State_Index; + Start_State : State_Index; + End_State : State_Index) return Regexp + is + pragma Warnings (Off, Num_States); + + Last_Index : constant State_Index := First_Table'Last (1); + type Meta_State is array (1 .. Last_Index) of Boolean; + + Table : Regexp_Array (1 .. Last_Index, 0 .. Alphabet_Size) := + (others => (others => 0)); + + Meta_States : array (1 .. Last_Index + 1) of Meta_State := + (others => (others => False)); + + Temp_State_Not_Null : Boolean; + + Is_Final : Boolean_Array (1 .. Last_Index) := (others => False); + + Current_State : State_Index := 1; + Nb_State : State_Index := 1; + + procedure Closure + (State : in out Meta_State; + Item : State_Index); + -- Compute the closure of the state (that is every other state which + -- has a empty-character transition) and add it to the state + + ------------- + -- Closure -- + ------------- + + procedure Closure + (State : in out Meta_State; + Item : State_Index) + is + begin + if State (Item) then + return; + end if; + + State (Item) := True; + + for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop + if First_Table (Item, Column) = 0 then + return; + end if; + + Closure (State, First_Table (Item, Column)); + end loop; + end Closure; + + -- Start of processing for Create_Secondary_Table + + begin + -- Create a new state + + Closure (Meta_States (Current_State), Start_State); + + while Current_State <= Nb_State loop + + -- If this new meta-state includes the primary table end state, + -- then this meta-state will be a final state in the regexp + + if Meta_States (Current_State)(End_State) then + Is_Final (Current_State) := True; + end if; + + -- For every character in the regexp, calculate the possible + -- transitions from Current_State + + for Column in 0 .. Alphabet_Size loop + Meta_States (Nb_State + 1) := (others => False); + Temp_State_Not_Null := False; + + for K in Meta_States (Current_State)'Range loop + if Meta_States (Current_State)(K) + and then First_Table (K, Column) /= 0 + then + Closure + (Meta_States (Nb_State + 1), First_Table (K, Column)); + Temp_State_Not_Null := True; + end if; + end loop; + + -- If at least one transition existed + + if Temp_State_Not_Null then + + -- Check if this new state corresponds to an old one + + for K in 1 .. Nb_State loop + if Meta_States (K) = Meta_States (Nb_State + 1) then + Table (Current_State, Column) := K; + exit; + end if; + end loop; + + -- If not, create a new state + + if Table (Current_State, Column) = 0 then + Nb_State := Nb_State + 1; + Table (Current_State, Column) := Nb_State; + end if; + end if; + end loop; + + Current_State := Current_State + 1; + end loop; + + -- Returns the regexp + + declare + R : Regexp_Access; + + begin + R := new Regexp_Value (Alphabet_Size => Alphabet_Size, + Num_States => Nb_State); + R.Map := Map; + R.Is_Final := Is_Final (1 .. Nb_State); + R.Case_Sensitive := Case_Sensitive; + + for State in 1 .. Nb_State loop + for K in 0 .. Alphabet_Size loop + R.States (State, K) := Table (State, K); + end loop; + end loop; + + return (Ada.Finalization.Controlled with R => R); + end; + end Create_Secondary_Table; + + --------------------- + -- Raise_Exception -- + --------------------- + + procedure Raise_Exception (M : String; Index : Integer) is + begin + raise Error_In_Regexp with M & " at offset" & Index'Img; + end Raise_Exception; + + -- Start of processing for Compile + + begin + -- Special case for the empty string: it always matches, and the + -- following processing would fail on it. + if S = "" then + return (Ada.Finalization.Controlled with + R => new Regexp_Value' + (Alphabet_Size => 0, + Num_States => 1, + Map => (others => 0), + States => (others => (others => 1)), + Is_Final => (others => True), + Case_Sensitive => True)); + end if; + + if not Case_Sensitive then + System.Case_Util.To_Lower (S); + end if; + + -- Check the pattern is well-formed before any treatment + + Check_Well_Formed_Pattern; + + Create_Mapping; + + -- Creates the primary table + + declare + Table : Regexp_Array_Access; + Num_States : State_Index; + Start_State : State_Index; + End_State : State_Index; + R : Regexp; + + begin + Table := new Regexp_Array (1 .. 100, + 0 .. Alphabet_Size + 10); + if not Glob then + Create_Primary_Table (Table, Num_States, Start_State, End_State); + else + Create_Primary_Table_Glob + (Table, Num_States, Start_State, End_State); + end if; + + -- Creates the secondary table + + R := Create_Secondary_Table + (Table, Num_States, Start_State, End_State); + Free (Table); + return R; + end; + end Compile; + + -------------- + -- Finalize -- + -------------- + + procedure Finalize (R : in out Regexp) is + procedure Free is new + Ada.Unchecked_Deallocation (Regexp_Value, Regexp_Access); + + begin + Free (R.R); + end Finalize; + + --------- + -- Get -- + --------- + + function Get + (Table : Regexp_Array_Access; + State : State_Index; + Column : Column_Index) return State_Index + is + begin + if State <= Table'Last (1) + and then Column <= Table'Last (2) + then + return Table (State, Column); + else + return 0; + end if; + end Get; + + ----------- + -- Match -- + ----------- + + function Match (S : String; R : Regexp) return Boolean is + Current_State : State_Index := 1; + + begin + if R.R = null then + raise Constraint_Error; + end if; + + for Char in S'Range loop + + if R.R.Case_Sensitive then + Current_State := R.R.States (Current_State, R.R.Map (S (Char))); + else + Current_State := + R.R.States (Current_State, + R.R.Map (System.Case_Util.To_Lower (S (Char)))); + end if; + + if Current_State = 0 then + return False; + end if; + + end loop; + + return R.R.Is_Final (Current_State); + end Match; + + --------- + -- Set -- + --------- + + procedure Set + (Table : in out Regexp_Array_Access; + State : State_Index; + Column : Column_Index; + Value : State_Index) + is + New_Lines : State_Index; + New_Columns : Column_Index; + New_Table : Regexp_Array_Access; + + begin + if State <= Table'Last (1) + and then Column <= Table'Last (2) + then + Table (State, Column) := Value; + else + -- Doubles the size of the table until it is big enough that + -- (State, Column) is a valid index + + New_Lines := Table'Last (1) * (State / Table'Last (1) + 1); + New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1); + New_Table := new Regexp_Array (Table'First (1) .. New_Lines, + Table'First (2) .. New_Columns); + New_Table.all := (others => (others => 0)); + + for J in Table'Range (1) loop + for K in Table'Range (2) loop + New_Table (J, K) := Table (J, K); + end loop; + end loop; + + Free (Table); + Table := New_Table; + Table (State, Column) := Value; + end if; + end Set; + +end System.Regexp; -- cgit v1.2.3