From: Laurent Pautet Date: Wed, 30 Jul 2008 17:38:55 +0000 (+0200) Subject: g-pehage.adb: Remove a limitation on the length of the words handled by the minimal... X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=db18b0f50ba5b0facae835217a92961b2c765aaa;p=platform%2Fupstream%2Fgcc.git g-pehage.adb: Remove a limitation on the length of the words handled by the minimal perfect... 2008-07-30 Laurent Pautet * g-pehage.adb: Remove a limitation on the length of the words handled by the minimal perfect hash function generator. * g-pehage.ads: Detail the use of subprograms Insert, Initialize, Compute and Finalize. Fix some typos. From-SVN: r138331 --- diff --git a/gcc/ada/g-pehage.adb b/gcc/ada/g-pehage.adb index 8d47333..c779fac 100644 --- a/gcc/ada/g-pehage.adb +++ b/gcc/ada/g-pehage.adb @@ -6,7 +6,7 @@ -- -- -- B o d y -- -- -- --- Copyright (C) 2002-2007, AdaCore -- +-- Copyright (C) 2002-2008, AdaCore -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- @@ -51,8 +51,8 @@ package body GNAT.Perfect_Hash_Generators is -- where f1 and f2 are functions that map strings into integers, and g is a -- function that maps integers into [0, m-1]. h can be order preserving. - -- For instance, let W = {w_0, ..., w_i, ..., - -- w_m-1}, h can be defined such that h (w_i) = i. + -- For instance, let W = {w_0, ..., w_i, ..., w_m-1}, h can be defined + -- such that h (w_i) = i. -- This algorithm defines two possible constructions of f1 and f2. Method -- b) stores the hash function in less memory space at the expense of @@ -82,10 +82,10 @@ package body GNAT.Perfect_Hash_Generators is -- probability of generating an acyclic graph, n >= 2m. If it is not -- acyclic, Tk have to be regenerated. - -- In the assignment step, the algorithm builds function g. As is acyclic, - -- there is a vertex v1 with only one neighbor v2. Let w_i be the word such - -- that v1 = f1 (w_i) and v2 = f2 (w_i). Let g (v1) = 0 by construction and - -- g (v2) = (i - g (v1)) mod n (or to be general, (h (i) - g (v1) mod n). + -- In the assignment step, the algorithm builds function g. As G is + -- acyclic, there is a vertex v1 with only one neighbor v2. Let w_i be + -- the word such that v1 = f1 (w_i) and v2 = f2 (w_i). Let g (v1) = 0 by + -- construction and g (v2) = (i - g (v1)) mod n (or h (i) - g (v1) mod n). -- If word w_j is such that v2 = f1 (w_j) and v3 = f2 (w_j), g (v3) = (j - -- g (v2)) mod (or to be general, (h (j) - g (v2)) mod n). If w_i has no -- neighbor, then another vertex is selected. The algorithm traverses G to @@ -102,11 +102,12 @@ package body GNAT.Perfect_Hash_Generators is No_Edge : constant Edge_Id := -1; No_Table : constant Table_Id := -1; - Max_Word_Length : constant := 32; - subtype Word_Type is String (1 .. Max_Word_Length); - Null_Word : constant Word_Type := (others => ASCII.NUL); - -- Store keyword in a word. Note that the length of word is limited to 32 - -- characters. + type Word_Type is new String_Access; + procedure Free_Word (W : in out Word_Type); + function New_Word (S : String) return Word_Type; + + procedure Resize_Word (W : in out Word_Type; Len : Natural); + -- Resize string W to have a length Len type Key_Type is record Edge : Edge_Id; @@ -130,8 +131,12 @@ package body GNAT.Perfect_Hash_Generators is package WT is new GNAT.Table (Word_Type, Word_Id, 0, 32, 32); package IT is new GNAT.Table (Integer, Integer, 0, 32, 32); - -- The two main tables. IT is used to store several tables of components - -- containing only integers. + -- The two main tables. WT is used to store the words in their initial + -- version and in their reduced version (that is words reduced to + -- their significant characters). As an instance of GNAT.Table, WT does + -- not initialize string pointers to null. This initialization has to be + -- done manually when the table is allocated. IT is used to store several + -- tables of components containing only integers. function Image (Int : Integer; W : Natural := 0) return String; function Image (Str : String; W : Natural := 0) return String; @@ -298,9 +303,6 @@ package body GNAT.Perfect_Hash_Generators is function Allocate (N : Natural; S : Natural := 1) return Table_Id; -- Allocate N * S ints from IT table - procedure Free_Tmp_Tables; - -- Deallocate the tables used by the algorithm (but not the keys table) - ---------- -- Keys -- ---------- @@ -408,7 +410,7 @@ package body GNAT.Perfect_Hash_Generators is -- Optimization mode (memory vs CPU) Max_Key_Len : Natural := 0; - Min_Key_Len : Natural := Max_Word_Length; + Min_Key_Len : Natural := 0; -- Maximum and minimum of all the word length S : Natural; @@ -530,26 +532,27 @@ package body GNAT.Perfect_Hash_Generators is procedure Apply_Position_Selection is begin - WT.Set_Last (2 * NK); for J in 0 .. NK - 1 loop declare - I_Word : constant Word_Type := WT.Table (Initial (J)); - R_Word : Word_Type := Null_Word; - Index : Natural := I_Word'First - 1; + IW : constant String := WT.Table (Initial (J)).all; + RW : String (1 .. IW'Length) := (others => ASCII.NUL); + N : Natural := IW'First - 1; begin -- Select the characters of Word included in the position -- selection. for C in 0 .. Char_Pos_Set_Len - 1 loop - exit when I_Word (Get_Char_Pos (C)) = ASCII.NUL; - Index := Index + 1; - R_Word (Index) := I_Word (Get_Char_Pos (C)); + exit when IW (Get_Char_Pos (C)) = ASCII.NUL; + N := N + 1; + RW (N) := IW (Get_Char_Pos (C)); end loop; - -- Build the new table with the reduced word + -- Build the new table with the reduced word. Be careful + -- to deallocate the old version to avoid memory leaks. - WT.Table (Reduced (J)) := R_Word; + Free_Word (WT.Table (Reduced (J))); + WT.Table (Reduced (J)) := New_Word (RW); Set_Key (J, (Edge => No_Edge)); end; end loop; @@ -588,7 +591,7 @@ package body GNAT.Perfect_Hash_Generators is -- Start of processing for Assign_Values_To_Vertices begin - -- Value -1 denotes an uninitialized value as it is supposed to + -- Value -1 denotes an unitialized value as it is supposed to -- be in the range 0 .. NK. if G = No_Table then @@ -628,9 +631,9 @@ package body GNAT.Perfect_Hash_Generators is Success : Boolean := False; begin - NV := Natural (K2V * Float (NK)); - - Keys := Allocate (NK); + if NK = 0 then + raise Program_Error with "keywords set cannot be empty"; + end if; if Verbose then Put_Initial_Keys (Output, "Initial Key Table"); @@ -861,23 +864,16 @@ package body GNAT.Perfect_Hash_Generators is procedure Finalize is begin - Free_Tmp_Tables; + -- Deallocate all the WT components (both initial and reduced + -- ones) to avoid memory leaks. + for W in 0 .. WT.Last loop + Free_Word (WT.Table (W)); + end loop; WT.Release; IT.Release; - NK := 0; - Max_Key_Len := 0; - Min_Key_Len := Max_Word_Length; - end Finalize; - - --------------------- - -- Free_Tmp_Tables -- - --------------------- - - procedure Free_Tmp_Tables is - begin - IT.Init; + -- Reset all variables for next usage Keys := No_Table; @@ -901,7 +897,22 @@ package body GNAT.Perfect_Hash_Generators is Vertices := No_Table; NV := 0; - end Free_Tmp_Tables; + + NK := 0; + Max_Key_Len := 0; + Min_Key_Len := 0; + end Finalize; + + --------------- + -- Free_Word -- + --------------- + + procedure Free_Word (W : in out Word_Type) is + begin + if W /= null then + Free (W); + end if; + end Free_Word; ---------------------------- -- Generate_Mapping_Table -- @@ -1130,20 +1141,76 @@ package body GNAT.Perfect_Hash_Generators is Tries : Positive := Default_Tries) is begin - -- Free previous tables (the settings may have changed between two runs) + -- Deallocated the part of the table concerning the reduced + -- words. Initial words are already present in the table. We + -- may have reduced words already there because a previous + -- computation failed. We are currently retrying and the + -- reduced words have to be deallocated. + + for W in NK .. WT.Last loop + Free_Word (WT.Table (W)); + end loop; + IT.Init; - Free_Tmp_Tables; + -- Initialize of computation variables - if K_To_V <= 2.0 then - Put (Output, "K to V ratio cannot be lower than 2.0"); - New_Line (Output); - raise Program_Error; - end if; + Keys := No_Table; + + Char_Pos_Set := No_Table; + Char_Pos_Set_Len := 0; + + Used_Char_Set := No_Table; + Used_Char_Set_Len := 0; + + T1 := No_Table; + T2 := No_Table; + + T1_Len := 0; + T2_Len := 0; + + G := No_Table; + G_Len := 0; + + Edges := No_Table; + Edges_Len := 0; + + Vertices := No_Table; + NV := 0; S := Seed; K2V := K_To_V; Opt := Optim; NT := Tries; + + if K2V <= 2.0 then + raise Program_Error with "K to V ratio cannot be lower than 2.0"; + end if; + + -- Do not accept a value of K2V too close to 2.0 such that once + -- rounded up, NV = 2 * NK because the algorithm would not converge. + + NV := Natural (Float (NK) * K2V); + if NV <= 2 * NK then + NV := 2 * NK + 1; + end if; + + Keys := Allocate (NK); + + -- Resize initial words to have all of them at the same size + -- (so the size of the largest one). + + for K in 0 .. NK - 1 loop + Resize_Word (WT.Table (Initial (K)), Max_Key_Len); + end loop; + + -- Allocated the table to store the reduced words. As WT is a + -- GNAT.Table (using C memory management), pointers have to be + -- explicitly initialized to null. + + WT.Set_Last (Reduced (NK - 1)); + for W in 0 .. NK - 1 loop + WT.Table (Reduced (W)) := null; + end loop; end Initialize; ------------ @@ -1151,28 +1218,18 @@ package body GNAT.Perfect_Hash_Generators is ------------ procedure Insert (Value : String) is - Word : Word_Type := Null_Word; Len : constant Natural := Value'Length; begin - Word (1 .. Len) := Value (Value'First .. Value'First + Len - 1); WT.Set_Last (NK); - WT.Table (NK) := Word; + WT.Table (NK) := New_Word (Value); NK := NK + 1; - NV := Natural (Float (NK) * K2V); - - -- Do not accept a value of K2V too close to 2.0 such that once rounded - -- up, NV = 2 * NK because the algorithm would not converge. - - if NV <= 2 * NK then - NV := 2 * NK + 1; - end if; if Max_Key_Len < Len then Max_Key_Len := Len; end if; - if Len < Min_Key_Len then + if Min_Key_Len = 0 or else Len < Min_Key_Len then Min_Key_Len := Len; end if; end Insert; @@ -1188,6 +1245,15 @@ package body GNAT.Perfect_Hash_Generators is end if; end New_Line; + -------------- + -- New_Word -- + -------------- + + function New_Word (S : String) return Word_Type is + begin + return new String'(S); + end New_Word; + ------------------------------ -- Parse_Position_Selection -- ------------------------------ @@ -1761,7 +1827,7 @@ package body GNAT.Perfect_Hash_Generators is K := Get_Key (J); Put (File, Image (J, M), F1, L1, J, 1, 3, 1); Put (File, Image (K.Edge, M), F1, L1, J, 1, 3, 2); - Put (File, WT.Table (Initial (J)), F1, L1, J, 1, 3, 3); + Put (File, WT.Table (Initial (J)).all, F1, L1, J, 1, 3, 3); end loop; end Put_Initial_Keys; @@ -1842,7 +1908,7 @@ package body GNAT.Perfect_Hash_Generators is K := Get_Key (J); Put (File, Image (J, M), F1, L1, J, 1, 3, 1); Put (File, Image (K.Edge, M), F1, L1, J, 1, 3, 2); - Put (File, WT.Table (Reduced (J)), F1, L1, J, 1, 3, 3); + Put (File, WT.Table (Reduced (J)).all, F1, L1, J, 1, 3, 3); end loop; end Put_Reduced_Keys; @@ -1920,6 +1986,22 @@ package body GNAT.Perfect_Hash_Generators is return K + NK + 1; end Reduced; + ----------------- + -- Resize_Word -- + ----------------- + + procedure Resize_Word (W : in out Word_Type; Len : Natural) is + S1 : constant String := W.all; + S2 : String (1 .. Len) := (others => ASCII.NUL); + L : constant Natural := S1'Length; + begin + if L /= Len then + Free_Word (W); + S2 (1 .. L) := S1; + W := New_Word (S2); + end if; + end Resize_Word; + -------------------------- -- Select_Char_Position -- -------------------------- @@ -1985,11 +2067,11 @@ package body GNAT.Perfect_Hash_Generators is begin if L = 0 then - Left := Reduced (0) - 1; + Left := NK; Right := Offset + R; elsif R = 0 then Left := Offset + L; - Right := Reduced (0) - 1; + Right := NK; else Left := Offset + L; Right := Offset + R; @@ -2007,17 +2089,18 @@ package body GNAT.Perfect_Hash_Generators is begin if From = 0 then - Source := Reduced (0) - 1; + Source := NK; Target := Offset + To; elsif To = 0 then Source := Offset + From; - Target := Reduced (0) - 1; + Target := NK; else Source := Offset + From; Target := Offset + To; end if; WT.Table (Target) := WT.Table (Source); + WT.Table (Source) := null; end Move; package Sorting is new GNAT.Heap_Sort_G (Move, Lt); @@ -2120,9 +2203,8 @@ package body GNAT.Perfect_Hash_Generators is begin -- Initialize the reduced words set - WT.Set_Last (2 * NK); for K in 0 .. NK - 1 loop - WT.Table (Reduced (K)) := WT.Table (Initial (K)); + WT.Table (Reduced (K)) := New_Word (WT.Table (Initial (K)).all); end loop; declare @@ -2220,7 +2302,7 @@ package body GNAT.Perfect_Hash_Generators is Same_Keys_Sets_Table (J).First .. Same_Keys_Sets_Table (J).Last loop - Put (Output, WT.Table (Reduced (K))); + Put (Output, WT.Table (Reduced (K)).all); New_Line (Output); end loop; Put (Output, "--"); diff --git a/gcc/ada/g-pehage.ads b/gcc/ada/g-pehage.ads index b2aea49..277a2a4 100644 --- a/gcc/ada/g-pehage.ads +++ b/gcc/ada/g-pehage.ads @@ -6,7 +6,7 @@ -- -- -- S p e c -- -- -- --- Copyright (C) 2002-2005, AdaCore -- +-- Copyright (C) 2002-2008, AdaCore -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- @@ -60,7 +60,7 @@ -- The hash table size corresponds to the exact size of W and *no larger*. -- This represents the "minimal" property. --- The functions generated by this package require the key set to be known in +-- The functions generated by this package require the words to be known in -- advance (they are "static" hash functions). The hash functions are also -- order preserving. If w2 is inserted after w1 in the generator, then (w1) -- < f (w2). These hashing functions are convenient for use with realtime @@ -99,27 +99,33 @@ package GNAT.Perfect_Hash_Generators is K_To_V : Float := Default_K_To_V; Optim : Optimization := CPU_Time; Tries : Positive := Default_Tries); - -- Initialize the generator and its internal structures. Set the ratio of - -- vertices over keys in the random graphs. This value has to be greater - -- than 2.0 in order for the algorithm to succeed. The key set is not - -- modified (in particular when it is already set). For instance, it is - -- possible to run several times the generator with different settings on - -- the same key set. + -- Initialize the generator and its internal structures. Set the + -- ratio of vertices over keys in the random graphs. This value + -- has to be greater than 2.0 in order for the algorithm to + -- succeed. The word set is not modified (in particular when it is + -- already set). For instance, it is possible to run several times + -- the generator with different settings on the same words. + + -- A classical way of doing is to Insert all the words and then to + -- invoke Initialize and Compute. If Compute fails to find a + -- perfect hash function, invoke Initialize another time with + -- other configuration parameters (probably with a greater K_To_V + -- ratio). Once successful, invoke Produce and Finalize. procedure Finalize; - -- Deallocate the internal structures and the key table + -- Deallocate the internal structures and the words table procedure Insert (Value : String); - -- Insert a new key in the table + -- Insert a new word in the table Too_Many_Tries : exception; -- Raised after Tries unsuccessful runs procedure Compute (Position : String := Default_Position); -- Compute the hash function. Position allows to define selection of - -- character positions used in the keywords hash function. Positions can be + -- character positions used in the word hash function. Positions can be -- separated by commas and range like x-y may be used. Character '$' - -- represents the final character of a key. With an empty position, the + -- represents the final character of a word. With an empty position, the -- generator automatically produces positions to reduce the memory usage. -- Raise Too_Many_Tries in case that the algorithm does not succeed in less -- than Tries attempts (see Initialize). @@ -144,7 +150,7 @@ package GNAT.Perfect_Hash_Generators is -- F1 and F2 are two functions based on two function tables T1 and T2. -- Their definition depends on the chosen optimization mode. - -- Only some character positions are used in the keys because they are + -- Only some character positions are used in the words because they are -- significant. They are listed in a character position table (P in the -- pseudo-code below). For instance, in {"jan", "feb", "mar", "apr", "jun", -- "jul", "aug", "sep", "oct", "nov", "dec"}, only positions 2 and 3 are @@ -152,7 +158,7 @@ package GNAT.Perfect_Hash_Generators is -- {2, 3} -- When Optimization is CPU_Time, the first dimension of T1 and T2 - -- corresponds to the character position in the key and the second to the + -- corresponds to the character position in the word and the second to the -- character set. As all the character set is not used, we define a used -- character table which associates a distinct index to each used character -- (unused characters are mapped to zero). In this case, the second @@ -177,7 +183,7 @@ package GNAT.Perfect_Hash_Generators is -- end Hash; -- When Optimization is Memory_Space, the first dimension of T1 and T2 - -- corresponds to the character position in the key and the second + -- corresponds to the character position in the word and the second -- dimension is ignored. T1 and T2 are no longer matrices but vectors. -- Therefore, the used character table is not available. The hash function -- has the following form: