Update.
authorUlrich Drepper <drepper@redhat.com>
Mon, 18 Jan 1999 23:15:16 +0000 (23:15 +0000)
committerUlrich Drepper <drepper@redhat.com>
Mon, 18 Jan 1999 23:15:16 +0000 (23:15 +0000)
1999-01-18  Ulrich Drepper  <drepper@cygnus.com>

* iconv/gconv_conf.c (add_module): Complete rewrite.  Use cleverer
data structures and avoid creating intermediate representations
first.  Rewrite also all helper functions.
* iconv/gconv_db.c (find_derivation): Use new data structure for
module database.
* iconv/Versions: Remove __gconv_nmodules.
* iconv/iconv_prog.c: Rewrite generation of charset name list to
use new data structure.
* iconv/gconv_int.h (struct gconv_module): Add new elements for
database data structure.
(__gconv_modules_db): Update type.
(__gconv_transform_dummy): Removed.
* iconv/gconv_builtin.h: Remove dummy transformation.
* iconv/gconv_simple.c: Remove __gconv_transform_dummy.

* sysdeps/unix/sysv/linux/sparc/sparc32/syscalls.list: Remove
__syscall_vfork, add vfork.
* sysdeps/unix/sysv/linux/sparc/sparc64/syscalls.list: Likewise.

* Rules: Add dummp.c and dummy.o to common-generated.

ChangeLog
iconv/Versions
iconv/gconv_builtin.h
iconv/gconv_conf.c
iconv/gconv_db.c
iconv/gconv_int.h
iconv/gconv_simple.c
iconv/iconv_prog.c
sysdeps/unix/sysv/linux/sparc/sparc32/syscalls.list
sysdeps/unix/sysv/linux/sparc/sparc64/syscalls.list

index ebcd2dc..ddc5f09 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+1999-01-18  Ulrich Drepper  <drepper@cygnus.com>
+
+       * iconv/gconv_conf.c (add_module): Complete rewrite.  Use cleverer
+       data structures and avoid creating intermediate representations
+       first.  Rewrite also all helper functions.
+       * iconv/gconv_db.c (find_derivation): Use new data structure for
+       module database.
+       * iconv/Versions: Remove __gconv_nmodules.
+       * iconv/iconv_prog.c: Rewrite generation of charset name list to
+       use new data structure.
+       * iconv/gconv_int.h (struct gconv_module): Add new elements for
+       database data structure.
+       (__gconv_modules_db): Update type.
+       (__gconv_transform_dummy): Removed.
+       * iconv/gconv_builtin.h: Remove dummy transformation.
+       * iconv/gconv_simple.c: Remove __gconv_transform_dummy.
+
+       * sysdeps/unix/sysv/linux/sparc/sparc32/syscalls.list: Remove
+       __syscall_vfork, add vfork.
+       * sysdeps/unix/sysv/linux/sparc/sparc64/syscalls.list: Likewise.
+
 1999-01-18  Andreas Jaeger  <aj@arthur.rhein-neckar.de>
 
        * posix/test-vfork.c: Include <sys/wait.h> for wait declaration.
@@ -9,7 +30,7 @@
 
 1999-01-18  Ulrich Drepper  <drepper@cygnus.com>
 
-       * Rules: Add dummp.y and dummy.o to common-generated.
+       * Rules: Add dummp.c and dummy.o to common-generated.
        Patch by Andreas Schwab.
 
 1999-01-18 10:07 -0500  Zack Weinberg  <zack@rabi.phys.columbia.edu>
index 25d1642..577a548 100644 (file)
@@ -1,7 +1,7 @@
 libc {
   GLIBC_2.1 {
     # global variables
-    __gconv_alias_db; __gconv_nmodules; __gconv_modules_db;
+    __gconv_alias_db; __gconv_modules_db;
 
     # i*
     iconv_open; iconv; iconv_close;
index 554989b..e12f1e4 100644 (file)
@@ -79,8 +79,3 @@ BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, "UNICODELITTLE//",
                        1, "=INTERNAL->ucs2little",
                        __gconv_transform_internal_ucs2little, NULL, NULL,
                        4, 4, 2, 2)
-
-
-BUILTIN_TRANSFORMATION ("(.*)", NULL, 0, "\\1", 1, "=dummy",
-                       __gconv_transform_dummy, NULL, NULL,
-                       1, 1, 1, 1)
index d3f516e..24ec14a 100644 (file)
@@ -1,5 +1,5 @@
 /* Handle configuration data.
-   Copyright (C) 1997, 1998 Free Software Foundation, Inc.
+   Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -83,99 +83,84 @@ builtin_aliases[] =
 #endif
 
 
-/* Function for searching module.  */
+/* Test whether there is already a matching module known.  */
 static int
-module_compare (const void *p1, const void *p2)
+internal_function
+detect_conflict (const char *alias, size_t alias_len)
 {
-  const struct gconv_module *s1 = (const struct gconv_module *) p1;
-  const struct gconv_module *s2 = (const struct gconv_module *) p2;
-  int result;
+  struct gconv_module *node = __gconv_modules_db;
 
-  if (s1->from_pattern == NULL)
+  while (node != NULL)
     {
-      if (s2->from_pattern == NULL)
-       result = strcmp (s1->from_constpfx, s2->from_constpfx);
-      else
-       result = -1;
-    }
-  else if (s2->from_pattern == NULL)
-    result = 1;
-  else
-    result = strcmp (s1->from_pattern, s2->from_pattern);
-
-  if (result == 0)
-    result = strcmp (s1->to_string, s2->to_string);
-
-  return result;
-}
-
-
-/* This function is used to test for a conflict which could be introduced
-   if adding a new alias.
+      int cmpres = strncmp (alias, node->from_constpfx,
+                           MIN (alias_len, node->from_constpfx_len));
 
-   This function is a *very* ugly hack.  The action-function is not
-   supposed to alter the parameter.  But we have to do this.   We will if
-   necessary compile the regular expression  so that we can see whether it
-   matches the alias name.  This is safe in this environment and for the
-   sake of performance we do it this way.  The alternative would be to
-   compile all regular expressions right from the start or to forget about
-   the compilation though we might need it later.
-
-   The second ugliness is that we have no possibility to pass parameters
-   to the function.  Therefore we use a global variable.  This is no problem
-   since we are for sure alone even in multi-threaded applications.  */
-
-/* This is alias we want to check.  */
-static const char *alias_to_test;
-
-/* This variable is set to a nonzero value once we have found a matching
-   entry.  */
-static int abort_conflict_search;
-
-static void
-detect_conflict (const void *p, VISIT value, int level)
-{
-  struct gconv_module *s = *(struct gconv_module **) p;
-
-  if ((value != endorder && value != leaf) || s->from_constpfx == NULL
-      || abort_conflict_search)
-    return;
-
-  /* Before we test the whole expression (if this is a regular expression)
-     make sure the constant prefix matches.  In case this is no regular
-     expression this is the whole string.  */
-  if (strcmp (alias_to_test, s->from_constpfx) == 0)
-    {
-      if (s->from_pattern == NULL)
-       /* This is a simple string and therefore we have a conflict.  */
-       abort_conflict_search = 1;
-      else
+      if (cmpres == 0)
        {
-         /* Make sure the regular expression is compiled (if possible).  */
-         if (s->from_regex == NULL)
+         struct gconv_module *runp;
+
+         if (alias_len < node->from_constpfx_len)
+           /* Cannot possibly match.  */
+           return 0;
+
+         /* This means the prefix and the alias are identical.  If
+            there is now a simple extry or a regular expression
+            matching this name we have found a conflict.  If there is
+            no conflict with the elements in the `same' list there
+            cannot be a conflict.  */
+         runp = node;
+         do
            {
-             /* Beware, this is what I warned you about in the comment
-                above.  We are modifying the object.  */
-             if (__regcomp (&s->from_regex_mem, s->from_pattern,
-                            REG_EXTENDED | REG_ICASE) != 0)
-               /* Something is wrong.  Remember this.  */
-               s->from_regex = (regex_t *) -1L;
+             if (runp->from_pattern == NULL)
+               {
+                 /* This is a simple entry and therefore we have a
+                    conflict if the strings are really the same.  */
+                 if (alias_len == node->from_constpfx_len)
+                   return 1;
+               }
              else
-               s->from_regex = &s->from_regex_mem;
+               {
+                 /* Compile the regular expression if necessary.  */
+                 if (runp->from_regex == NULL)
+                   {
+                     if (__regcomp (&runp->from_regex_mem,
+                                    runp->from_pattern,
+                                    REG_EXTENDED | REG_ICASE) != 0)
+                       /* Something is wrong.  Remember this.  */
+                       runp->from_regex = (regex_t *) -1L;
+                     else
+                       runp->from_regex = &runp->from_regex_mem;
+                   }
+
+                 if (runp->from_regex != (regex_t *) -1L)
+                   {
+                     regmatch_t match[1];
+
+                     /* Try to match the regular expression.  */
+                     if (__regexec (runp->from_regex, alias, 1, match, 0) == 0
+                         && match[0].rm_so == 0
+                         && alias[match[0].rm_eo] == '\0')
+                       /* They match, therefore it is a conflict.  */
+                       return 1;
+                   }
+               }
+
+             runp = runp->same;
            }
+         while (runp != NULL);
 
-         if (s->from_regex != (regex_t *) -1L)
-           {
-             regmatch_t match[1];
+         if (alias_len == node->from_constpfx_len)
+             return 0;
 
-             if (__regexec (s->from_regex, alias_to_test, 1, match, 0) == 0
-                 && match[0].rm_so == 0
-                 && alias_to_test[match[0].rm_eo] == '\0')
-               /* The whole string matched.  This is also a conflict.  */
-               abort_conflict_search = 1;
-           }
+         node = node->matching;
        }
+      else if (cmpres < 0)
+       node = node->left;
+      else
+       node = node->right;
     }
+
+  return node != NULL;
 }
 
 
@@ -207,13 +192,8 @@ add_alias (char *rp, void *modules)
     return;
   *wp++ = '\0';
 
-  /* Test whether this alias conflicts with any available module.  See
-     the comment before the function `detect_conflict' for a description
-     of this ugly hack.  */
-  alias_to_test = from;
-  abort_conflict_search = 0;
-  __twalk (modules, detect_conflict);
-  if (abort_conflict_search)
+  /* Test whether this alias conflicts with any available module.  */
+  if (detect_conflict (from, to - from - 1))
     /* It does conflict, don't add the alias.  */
     return;
 
@@ -234,8 +214,74 @@ add_alias (char *rp, void *modules)
 }
 
 
+/* Insert a data structure for a new module in the search tree.  */
+static inline void
+internal_function
+insert_module (struct gconv_module *newp)
+{
+  struct gconv_module **rootp = &__gconv_modules_db;
+
+  while (*rootp != NULL)
+    {
+      struct gconv_module *root = *rootp;
+      size_t minlen = MIN (newp->from_constpfx_len, root->from_constpfx_len);
+      int cmpres;
+
+      cmpres = strncmp (newp->from_constpfx, root->from_constpfx, minlen);
+      if (cmpres == 0)
+       {
+         /* This can mean two things: the prefix is entirely the same or
+            it matches only for the minimum length of both strings.  */
+         if (newp->from_constpfx_len == root->from_constpfx_len)
+           {
+             /* Both prefixes are identical.  Insert the string at the
+                end of the `same' list if it is not already there.  */
+             const char *from_pattern = (newp->from_pattern
+                                         ?: newp->from_constpfx);
+
+             while (strcmp (from_pattern,
+                            root->from_pattern ?: root->from_constpfx) != 0
+                    || strcmp (newp->to_string, root->to_string) != 0)
+               {
+                 rootp = &root->same;
+                 root = *rootp;
+                 if (root == NULL)
+                   break;
+               }
+
+             if (root != NULL)
+               /* This is a no new conversion.  */
+               return;
+
+             break;
+           }
+
+         /* The new element either has a prefix which is itself a
+            prefix for the prefix of the current node or vice verse.
+            In the first case we insert the node right here.  Otherwise
+            we have to descent further.  */
+         if (newp->from_constpfx_len < root->from_constpfx_len)
+           {
+             newp->matching = root;
+             break;
+           }
+
+         rootp = &root->matching;
+       }
+      else if (cmpres < 0)
+       rootp = &root->left;
+      else
+       rootp = &root->right;
+    }
+
+  /* Plug in the new node here.  */
+  *rootp = newp;
+}
+
+
 /* Add new module.  */
 static inline void
+internal_function
 add_module (char *rp, const char *directory, size_t dir_len, void **modules,
            size_t *nmodules, int modcounter)
 {
@@ -259,7 +305,7 @@ add_module (char *rp, const char *directory, size_t dir_len, void **modules,
   while (*rp != '\0' && !isspace (*rp))
     {
       if (!isalnum (*rp) && *rp != '-' && *rp != '/' && *rp != '.'
-         && *rp != '_')
+         && *rp != '_' && *rp != '(' && *rp != ')')
        from_is_regex = 1;
       ++rp;
     }
@@ -293,7 +339,7 @@ add_module (char *rp, const char *directory, size_t dir_len, void **modules,
 
       *wp++ = '\0';
       cost_hi = strtol (rp, &endp, 10);
-      if (rp == endp)
+      if (rp == endp || cost_hi < 1)
        /* No useful information.  */
        cost_hi = 1;
     }
@@ -328,7 +374,8 @@ add_module (char *rp, const char *directory, size_t dir_len, void **modules,
   else
     const_len = to - from - 1;
 
-  new_module = (struct gconv_module *) malloc (sizeof (struct gconv_module)
+  new_module = (struct gconv_module *) calloc (1,
+                                              sizeof (struct gconv_module)
                                               + (wp - from)
                                               + dir_len + need_ext);
   if (new_module != NULL)
@@ -340,13 +387,9 @@ add_module (char *rp, const char *directory, size_t dir_len, void **modules,
                                          from, to - from);
       if (from_is_regex)
        new_module->from_pattern = new_module->from_constpfx;
-      else
-       new_module->from_pattern = NULL;
 
       new_module->from_constpfx_len = const_len;
 
-      new_module->from_regex = NULL;
-
       new_module->to_string = memcpy ((char *) new_module->from_constpfx
                                      + (to - from), to, module - to);
 
@@ -388,31 +431,12 @@ add_module (char *rp, const char *directory, size_t dir_len, void **modules,
            }
        }
 
-      if (__tfind (new_module, modules, module_compare) == NULL)
-       {
-         if (__tsearch (new_module, modules, module_compare) == NULL)
-           /* Something went wrong while inserting the new module.  */
-           free (new_module);
-         else
-           ++*nmodules;
-       }
+      /* Now insert the new module data structure in our search tree.  */
+      insert_module (new_module);
     }
 }
 
 
-static void
-insert_module (const void *nodep, VISIT value, int level)
-{
-  if (value == preorder || value == leaf)
-    __gconv_modules_db[__gconv_nmodules++] = *(struct gconv_module **) nodep;
-}
-
-static void
-nothing (void *unused __attribute__ ((unused)))
-{
-}
-
-
 /* Read the next configuration file.  */
 static void
 internal_function
@@ -495,15 +519,14 @@ __gconv_read_conf (void)
     {
       /* Append the default path to the user-defined path.  */
       size_t user_len = strlen (user_path);
-      char *tmp;
 
       gconv_path = alloca (user_len + 1 + sizeof (default_gconv_path));
-      tmp = __mempcpy (gconv_path, user_path, user_len);
-      *tmp++ = ':';
-      __mempcpy (tmp, default_gconv_path, sizeof (default_gconv_path));
+      __mempcpy (__mempcpy (__mempcpy (gconv_path, user_path, user_len),
+                           ":", 1),
+                default_gconv_path, sizeof (default_gconv_path));
     }
 
-  elem = strtok_r (gconv_path, ":", &gconv_path);
+  elem = __strtok_r (gconv_path, ":", &gconv_path);
   while (elem != NULL)
     {
 #ifndef MAXPATHLEN
@@ -515,43 +538,38 @@ __gconv_read_conf (void)
       if (__realpath (elem, real_elem) != NULL)
        {
          size_t elem_len = strlen (real_elem);
-         char *filename, *tmp;
+         char *filename;
 
          filename = alloca (elem_len + 1 + sizeof (gconv_conf_filename));
-         tmp = __mempcpy (filename, real_elem, elem_len);
-         *tmp++ = '/';
-         __mempcpy (tmp, gconv_conf_filename, sizeof (gconv_conf_filename));
+         __mempcpy (__mempcpy (__mempcpy (filename, real_elem, elem_len),
+                               "/", 1),
+                    gconv_conf_filename, sizeof (gconv_conf_filename));
 
          /* Read the next configuration file.  */
          read_conf_file (filename, real_elem, elem_len, &modules, &nmodules);
        }
 
       /* Get next element in the path.  */
-      elem = strtok_r (NULL, ":", &gconv_path);
+      elem = __strtok_r (NULL, ":", &gconv_path);
     }
 
-  /* If the configuration files do not contain any valid module specification
-     remember this by setting the pointer to the module array to NULL.  */
-  nmodules += sizeof (builtin_modules) / sizeof (builtin_modules[0]);
-  if (nmodules == 0)
-    __gconv_modules_db = NULL;
-  else
+  /* Add the internal modules.  */
+  for (cnt = 0; cnt < sizeof (builtin_modules) / sizeof (builtin_modules[0]);
+       ++cnt)
     {
-      __gconv_modules_db =
-       (struct gconv_module **) malloc (nmodules
-                                        * sizeof (struct gconv_module));
-      if (__gconv_modules_db != NULL)
+      if (builtin_modules[cnt].from_pattern == NULL)
        {
-         size_t cnt;
+         struct gconv_alias fake_alias;
 
-         /* Insert all module entries into the array.  */
-         __twalk (modules, insert_module);
+         fake_alias.fromname = builtin_modules[cnt].from_constpfx;
 
-         /* Finally insert the builtin transformations.  */
-         for (cnt = 0; cnt < (sizeof (builtin_modules)
-                              / sizeof (struct gconv_module)); ++cnt)
-           __gconv_modules_db[__gconv_nmodules++] = &builtin_modules[cnt];
+         if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare)
+             != NULL)
+           /* It'll conflict so don't add it.  */
+           continue;
        }
+
+      insert_module (&builtin_modules[cnt]);
     }
 
   /* Add aliases for builtin conversions.  */
@@ -561,10 +579,6 @@ __gconv_read_conf (void)
       char *copy = strdupa (builtin_aliases[--cnt]);
       add_alias (copy, modules);
     }
-  
-  if (nmodules != 0)
-    /* Now remove the tree data structure.  */
-    __tdestroy (modules, nothing);
 
   /* Restore the error number.  */
   __set_errno (save_errno);
index e6253b8..5269d79 100644 (file)
@@ -1,5 +1,5 @@
 /* Provide access to the collection of available transformation modules.
-   Copyright (C) 1997, 1998 Free Software Foundation, Inc.
+   Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.  */
 
+#include <limits.h>
 #include <search.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/param.h>
 #include <bits/libc-lock.h>
 
 #include <ldsodefs.h>
@@ -32,8 +34,7 @@
 void *__gconv_alias_db;
 
 /* Array with available modules.  */
-size_t __gconv_nmodules;
-struct gconv_module **__gconv_modules_db;
+struct gconv_module *__gconv_modules_db;
 
 /* We modify global data.   */
 __libc_lock_define_initialized (static, lock)
@@ -55,14 +56,20 @@ __gconv_alias_compare (const void *p1, const void *p2)
 struct derivation_step
 {
   const char *result_set;
+  size_t result_set_len;
+  int cost_lo;
+  int cost_hi;
   struct gconv_module *code;
   struct derivation_step *last;
   struct derivation_step *next;
 };
 
-#define NEW_STEP(result, module, last_mod) \
+#define NEW_STEP(result, hi, lo, module, last_mod) \
   ({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
      newp->result_set = result;                                                      \
+     newp->result_set_len = strlen (result);                                 \
+     newp->cost_hi = hi;                                                     \
+     newp->cost_lo = lo;                                                     \
      newp->code = module;                                                    \
      newp->last = last_mod;                                                  \
      newp->next = NULL;                                                              \
@@ -224,7 +231,7 @@ gen_steps (struct derivation_step *best, const char *toset,
             {
               status = _CALL_DL_FCT (result[step_cnt].init_fct,
                                      (&result[step_cnt]));
-              
+
               if (status != GCONV_OK)
                 {
                   failed = 1;
@@ -276,9 +283,9 @@ find_derivation (const char *toset, const char *toset_expand,
                 struct gconv_step **handle, size_t *nsteps)
 {
   __libc_lock_define_initialized (static, lock)
-  struct derivation_step *first, *current, **lastp, *best = NULL;
-  int best_cost_hi = 0;
-  int best_cost_lo = 0;
+  struct derivation_step *first, *current, **lastp, *solution = NULL;
+  int best_cost_hi = INT_MAX;
+  int best_cost_lo = INT_MAX;
   int result;
 
   result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
@@ -299,202 +306,287 @@ find_derivation (const char *toset, const char *toset_expand,
       return result;
     }
 
-  /* ### TODO
-     For now we use a simple algorithm with quadratic runtime behaviour.
+  /* For now we use a simple algorithm with quadratic runtime behaviour.
      The task is to match the `toset' with any of the available rules,
      starting from FROMSET.  */
   if (fromset_expand != NULL)
     {
-      first = NEW_STEP (fromset_expand, NULL, NULL);
-      first->next = NEW_STEP (fromset, NULL, NULL);
+      first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
+      first->next = NEW_STEP (fromset, 0, 0, NULL, NULL);
       lastp = &first->next->next;
     }
   else
     {
-      first = NEW_STEP (fromset, NULL, NULL);
+      first = NEW_STEP (fromset, 0, 0, NULL, NULL);
       lastp = &first->next;
     }
 
-  current = first;
-  while (current != NULL)
+  for (current = first; current != NULL; current = current->next)
     {
       /* Now match all the available module specifications against the
          current charset name.  If any of them matches check whether
          we already have a derivation for this charset.  If yes, use the
          one with the lower costs.  Otherwise add the new charset at the
-         end.  */
-      size_t cnt;
-
-      for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
-        {
-         const char *result_set = NULL;
+         end.
+
+        The module database is organized in a tree form which allows to
+        search for prefixes.  So we search for the first entry with a
+        matching prefix and any other matching entry can be found from
+        this place.  */
+      struct gconv_module *node = __gconv_modules_db;
+
+      /* Maybe it is not necessary anymore to look for a solution for
+        this entry since the cost is already as high (or heigher) as
+        the cost for the best solution so far.  */
+      if (current->cost_hi > best_cost_hi
+         || (current->cost_hi == best_cost_hi
+             && current->cost_lo >= best_cost_lo))
+       continue;
+
+      while (node != NULL)
+       {
+         int cmpres = strncmp (current->result_set, node->from_constpfx,
+                               MIN (current->result_set_len,
+                                    node->from_constpfx_len));
 
-         if (__gconv_modules_db[cnt]->from_pattern == NULL)
+         if (cmpres == 0)
            {
-             if (__strcasecmp (current->result_set,
-                               __gconv_modules_db[cnt]->from_constpfx) == 0)
+             /* Walk through the list of modules with this prefix and
+                try to match the name.  */
+             struct gconv_module *runp;
+
+             if (current->result_set_len < node->from_constpfx_len)
+               /* Cannot possibly match.  */
+               break;
+
+             /* Check all the modules with this prefix.  */
+             runp = node;
+             do
                {
-                 if (strcmp (__gconv_modules_db[cnt]->to_string, "-") == 0)
-                   result_set = toset_expand ?: toset;
+                 const char *result_set = NULL;
+
+                 if (runp->from_pattern == NULL)
+                   {
+                     /* This is a simple entry and therefore we have a
+                        found an matching entry if the strings are really
+                        equal.  */
+                     if (current->result_set_len == runp->from_constpfx_len)
+                       {
+                         if (strcmp (runp->to_string, "-") == 0)
+                           result_set = toset_expand ?: toset;
+                         else
+                           result_set = runp->to_string;
+                       }
+                   }
                  else
-                   result_set = __gconv_modules_db[cnt]->to_string;
-               }
-           }
-         else
-           /* We have a regular expression.  First see if the prefix
-              matches.  */
-           if (__strncasecmp (current->result_set,
-                              __gconv_modules_db[cnt]->from_constpfx,
-                              __gconv_modules_db[cnt]->from_constpfx_len)
-               == 0)
-             {
-               /* First compile the regex if not already done.  */
-               if (__gconv_modules_db[cnt]->from_regex == NULL)
-                 {
-                   if (__regcomp (&__gconv_modules_db[cnt]->from_regex_mem,
-                                  __gconv_modules_db[cnt]->from_pattern,
-                                  REG_EXTENDED | REG_ICASE) != 0)
-                     /* Something is wrong.  Remember this.  */
-                     __gconv_modules_db[cnt]->from_regex = (regex_t *) -1L;
-                   else
-                     __gconv_modules_db[cnt]->from_regex
-                       = &__gconv_modules_db[cnt]->from_regex_mem;
-                 }
-
-               if (__gconv_modules_db[cnt]->from_regex != (regex_t *) -1L)
-                 {
-                   /* Try to match the from name.  */
-                   regmatch_t match[4];
-
-                   if (__regexec (__gconv_modules_db[cnt]->from_regex,
-                                  current->result_set, 4, match, 0) == 0
-                       && match[0].rm_so == 0
-                       && current->result_set[match[0].rm_eo] == '\0')
-                     {
-                       /* At least the whole <from> string is matched.
-                          We must now match sed-like possible
-                          subexpressions from the match to the
-                          toset expression.  */
+                   {
+                     /* Compile the regular expression if necessary.  */
+                     if (runp->from_regex == NULL)
+                       {
+                         if (__regcomp (&runp->from_regex_mem,
+                                        runp->from_pattern,
+                                        REG_EXTENDED | REG_ICASE) != 0)
+                           /* Something is wrong.  Remember this.  */
+                           runp->from_regex = (regex_t *) -1L;
+                         else
+                           runp->from_regex = &runp->from_regex_mem;
+                       }
+
+                     if (runp->from_regex != (regex_t *) -1L)
+                       {
+                         regmatch_t match[4];
+
+                         /* Try to match the regular expression.  */
+                         if (__regexec (runp->from_regex, current->result_set,
+                                        4, match, 0) == 0
+                             && match[0].rm_so == 0
+                             && current->result_set[match[0].rm_eo] == '\0')
+                           {
+                             /* At least the whole <from> string is matched.
+                                We must now match sed-like possible
+                                subexpressions from the match to the
+                                toset expression.  */
 #define ENSURE_LEN(LEN) \
   if (wp + (LEN) >= constr + len - 1)                                        \
     {                                                                        \
       char *newp = alloca (len += 128);                                              \
-      memcpy (newp, constr, wp - constr);                                    \
-      wp = newp + (wp - constr);                                             \
+      wp = __mempcpy (newp, constr, wp - constr);                            \
       constr = newp;                                                         \
     }
-                       size_t len = 128;
-                       char *constr = alloca (len);
-                       char *wp = constr;
-                       const char *cp = __gconv_modules_db[cnt]->to_string;
-
-                       while (*cp != '\0')
-                         {
-                           if (*cp != '\\')
-                             {
-                               ENSURE_LEN (1);
-                               *wp++ = *cp++;
-                             }
-                           else if (cp[1] == '\0')
-                             /* Backslash at end of string.  */
-                             break;
-                           else
-                             {
-                               ++cp;
-                               if (*cp == '\\')
-                                 {
-                                   *wp++ = *cp++;
-                                   ENSURE_LEN (1);
-                                 }
-                               else if (*cp < '1' || *cp > '3')
-                                 break;
-                               else
-                                 {
-                                   int idx = *cp - '0';
-                                   if (match[idx].rm_so == -1)
-                                     /* No match.  */
-                                     break;
-
-                                   ENSURE_LEN (match[idx].rm_eo
-                                               - match[idx].rm_so);
-                                   wp = __mempcpy (wp,
-                                                   &current->result_set[match[idx].rm_so],
-                                                   match[idx].rm_eo
-                                                   - match[idx].rm_so);
-                                   ++cp;
-                                 }
-                             }
-                         }
-                       if (*cp == '\0' && wp != constr)
-                         {
-                                /* Terminate the constructed string.  */
-                           *wp = '\0';
-                           result_set = constr;
-                         }
-                     }
-                 }
-             }
-
-         if (result_set != NULL)
-           {
-             /* We managed to find a derivation.  First see whether
-                this is what we are looking for.  */
-             if (__strcasecmp (result_set, toset) == 0
-                 || (toset_expand != NULL
-                     && __strcasecmp (result_set, toset_expand) == 0))
-               {
-                 /* Determine the costs.  If they are lower than the
-                    previous solution (or this is the first solution)
-                    remember this solution.  */
-                 int cost_hi = __gconv_modules_db[cnt]->cost_hi;
-                 int cost_lo = __gconv_modules_db[cnt]->cost_lo;
-                 struct derivation_step *runp = current;
-                 while (runp->code != NULL)
-                   {
-                     cost_hi += runp->code->cost_hi;
-                     cost_lo += runp->code->cost_lo;
-                     runp = runp->last;
+                             size_t len = 128;
+                             char *constr = alloca (len);
+                             char *wp = constr;
+                             const char *cp = runp->to_string;
+
+                             while (*cp != '\0')
+                               {
+                                 if (*cp != '\\')
+                                   {
+                                     ENSURE_LEN (1);
+                                     *wp++ = *cp++;
+                                   }
+                                 else if (cp[1] == '\0')
+                                   /* Backslash at end of string.  */
+                                   break;
+                                 else
+                                   {
+                                     ++cp;
+                                     if (*cp == '\\')
+                                       {
+                                         *wp++ = *cp++;
+                                         ENSURE_LEN (1);
+                                       }
+                                     else if (*cp < '1' || *cp > '3')
+                                       break;
+                                     else
+                                       {
+                                         int idx = *cp - '0';
+                                         if (match[idx].rm_so == -1)
+                                           /* No match.  */
+                                           break;
+
+                                         ENSURE_LEN (match[idx].rm_eo
+                                                     - match[idx].rm_so);
+                                         wp = __mempcpy (wp,
+                                                         &current->result_set[match[idx].rm_so],
+                                                         match[idx].rm_eo
+                                                         - match[idx].rm_so);
+                                         ++cp;
+                                       }
+                                   }
+                               }
+                             if (*cp == '\0' && wp != constr)
+                               {
+                                 /* Terminate the constructed string.  */
+                                 *wp = '\0';
+                                 result_set = constr;
+                               }
+                           }
+                       }
                    }
-                 if (best == NULL || cost_hi < best_cost_hi
-                     || (cost_hi == best_cost_hi && cost_lo < best_cost_lo))
+
+                 if (result_set != NULL)
                    {
-                     best = NEW_STEP (result_set, __gconv_modules_db[cnt],
-                                      current);
-                     best_cost_hi = cost_hi;
-                     best_cost_lo = cost_lo;
+                     int cost_hi = runp->cost_hi + current->cost_hi;
+                     int cost_lo = runp->cost_lo + current->cost_lo;
+                     struct derivation_step *step;
+
+                     /* We managed to find a derivation.  First see whether
+                        this is what we are looking for.  */
+                     if (__strcasecmp (result_set, toset) == 0
+                         || (toset_expand != NULL
+                             && __strcasecmp (result_set, toset_expand) == 0))
+                       {
+                         if (solution == NULL || cost_hi < best_cost_hi
+                             || (cost_hi == best_cost_hi
+                                 && cost_lo < best_cost_lo))
+                           {
+                             best_cost_hi = cost_hi;
+                             best_cost_lo = cost_lo;
+                           }
+
+                         /* Append this solution to list.  */
+                         if (solution == NULL)
+                           solution = NEW_STEP (result_set, 0, 0, runp,
+                                                current);
+                         else
+                           {
+                             while (solution->next != NULL)
+                               solution = solution->next;
+
+                             solution->next = NEW_STEP (result_set, 0, 0,
+                                                        runp, current);
+                           }
+                       }
+                     else if (cost_hi < best_cost_hi
+                              || (cost_hi == best_cost_hi
+                                  && cost_lo < best_cost_lo))
+                       {
+                         /* Append at the end if there is no entry with
+                            this name.  */
+                         for (step = first; step != NULL; step = step->next)
+                           if (__strcasecmp (result_set, step->result_set)
+                               == 0)
+                             break;
+
+                         if (step == NULL)
+                           {
+                             *lastp = NEW_STEP (result_set,
+                                                cost_hi, cost_lo,
+                                                runp, current);
+                             lastp = &(*lastp)->next;
+                           }
+                         else if (step->cost_hi > cost_hi
+                                  || (step->cost_hi == cost_hi
+                                      && step->cost_lo > cost_lo))
+                           {
+                             step->code = runp;
+                             step->last = current;
+
+                             /* Update the cost for all steps.  */
+                             for (step = first; step != NULL;
+                                  step = step->next)
+                               {
+                                 struct derivation_step *back;
+
+                                 if (step->code == NULL)
+                                   /* This is one of the entries we started
+                                      from.  */
+                                   continue;
+
+                                 step->cost_hi = step->code->cost_hi;
+                                 step->cost_lo = step->code->cost_lo;
+
+                                 for (back = step->last; back->code != NULL;
+                                      back = back->last)
+                                   {
+                                     step->cost_hi += back->code->cost_hi;
+                                     step->cost_lo += back->code->cost_lo;
+                                   }
+                               }
+
+                             for (step = solution; step != NULL;
+                                  step = step->next)
+                               {
+                                 step->cost_hi = (step->code->cost_hi
+                                                  + step->last->cost_hi);
+                                 step->cost_lo = (step->code->cost_lo
+                                                  + step->last->cost_lo);
+
+                                 if (step->cost_hi < best_cost_hi
+                                     || (step->cost_hi == best_cost_hi
+                                         && step->cost_lo < best_cost_lo))
+                                   {
+                                     solution = step;
+                                     best_cost_hi = step->cost_hi;
+                                     best_cost_lo = step->cost_lo;
+                                   }
+                               }
+                           }
+                       }
                    }
+
+                 runp = runp->same;
                }
-             else
-               {
-                 /* Append at the end if there is no entry with this name.  */
-                 struct derivation_step *runp = first;
+             while (runp != NULL);
 
-                 while (runp != NULL)
-                   {
-                     if (__strcasecmp (result_set, runp->result_set) == 0)
-                       break;
-                     runp = runp->next;
-                   }
+             if (current->result_set_len == node->from_constpfx_len)
+               break;
 
-                 if (runp == NULL)
-                   {
-                     *lastp = NEW_STEP (result_set, __gconv_modules_db[cnt],
-                                        current);
-                     lastp = &(*lastp)->next;
-                   }
-                }
+             node = node->matching;
            }
+         else if (cmpres < 0)
+           node = node->left;
+         else
+           node = node->right;
        }
-
-      /* Go on with the next entry.  */
-      current = current->next;
     }
 
-  if (best != NULL)
+  if (solution != NULL)
     /* We really found a way to do the transformation.  Now build a data
        structure describing the transformation steps.*/
-    result = gen_steps (best, toset_expand ?: toset, fromset_expand ?: fromset,
-                       handle, nsteps);
+    result = gen_steps (solution, toset_expand ?: toset,
+                       fromset_expand ?: fromset, handle, nsteps);
   else
     {
       /* We haven't found a transformation.  Clear the result values.  */
@@ -620,20 +712,35 @@ __gconv_close_transform (struct gconv_step *steps, size_t nsteps)
 }
 
 
+/* Free the modules mentioned.  */
+static void
+internal_function
+free_modules_db (struct gconv_module *node)
+{
+  if (node->left != NULL)
+    free_modules_db (node->left);
+  if (node->right != NULL)
+    free_modules_db (node->right);
+  if (node->same != NULL)
+    free_modules_db (node->same);
+  do
+    {
+      struct gconv_module *act = node;
+      node = node->matching;
+      free (act);
+    }
+  while (node != NULL);
+}
+
+
 /* Free all resources if necessary.  */
 static void __attribute__ ((unused))
 free_mem (void)
 {
-  size_t cnt;
-
   if (__gconv_alias_db != NULL)
     __tdestroy (__gconv_alias_db, free);
 
-  for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
-    /* Modules which names do not start with a slash are builtin
-       transformations and the memory is not allocated dynamically.  */
-    if (__gconv_modules_db[cnt]->module_name[0] == '/')
-      free (__gconv_modules_db[cnt]);
+  free_modules_db (__gconv_modules_db);
 
   if (known_derivations != NULL)
     __tdestroy (known_derivations, free_derivation);
index bc67b0b..c78d0bb 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (C) 1997, 1998 Free Software Foundation, Inc.
+/* Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -75,6 +75,11 @@ struct gconv_module
   int cost_lo;
 
   const char *module_name;
+
+  struct gconv_module *left;   /* Prefix smaller.  */
+  struct gconv_module *same;   /* List of entries with identical prefix.  */
+  struct gconv_module *matching;/* Next node with more specific prefix.  */
+  struct gconv_module *right;  /* Prefix larger.  */
 };
 
 
@@ -85,7 +90,7 @@ extern void *__gconv_alias_db;
 
 /* Array with available modules.  */
 extern size_t __gconv_nmodules;
-extern struct gconv_module **__gconv_modules_db;
+extern struct gconv_module *__gconv_modules_db;
 
 
 /* Return in *HANDLE decriptor for transformation from FROMSET to TOSET.  */
@@ -102,7 +107,7 @@ extern int __gconv_close (gconv_t cd)
    bytes in buffer starting at *OUTBUF.  Return number of written
    characters in *CONVERTED if this pointer is not null.  */
 extern int __gconv (gconv_t __cd, const char **__inbuf, const char *inbufend,
-                   char **__outbuf, char *outbufend, size_t *__converted)
+                   char **__outbuf, char *outbufend, size_t *converted)
      internal_function;
 
 /* Return in *HANDLE a pointer to an array with *NSTEPS elements describing
@@ -152,7 +157,6 @@ extern void __gconv_get_builtin_trans (const char *__name,
                   const char **__inbuf, const char *__inbufend,              \
                   size_t *__written, int __do_flush)
 
-__BUILTIN_TRANS (__gconv_transform_dummy);
 __BUILTIN_TRANS (__gconv_transform_ascii_internal);
 __BUILTIN_TRANS (__gconv_transform_internal_ascii);
 __BUILTIN_TRANS (__gconv_transform_utf8_internal);
index c71c5ed..4084d04 100644 (file)
@@ -46,38 +46,6 @@ static const unsigned char encoding_byte[] =
 };
 
 
-
-int
-__gconv_transform_dummy (struct gconv_step *step, struct gconv_step_data *data,
-                        const char **inbuf, const char *inbufend,
-                        size_t *written, int do_flush)
-{
-  size_t do_write;
-
-  /* We have no stateful encoding.  So we don't have to do anything
-     special.  */
-  if (do_flush)
-    do_write = 0;
-  else
-    {
-      do_write = MIN (inbufend - *inbuf, data->outbufend - data->outbuf);
-
-      memcpy (data->outbuf, inbuf, do_write);
-
-      *inbuf -= do_write;
-      *data->outbuf += do_write;
-    }
-
-  /* ### TODO Actually, this number must be devided according to the
-     size of the input charset.  I.e., if the input is in UCS4 the
-     number of copied bytes must be divided by 4.  */
-  if (written != NULL)
-    *written = do_write;
-
-  return GCONV_OK;
-}
-
-
 /* Transform from the internal, UCS4-like format, to UCS4.  The
    difference between the internal ucs4 format and the real UCS4
    format is, if any, the endianess.  The Unicode/ISO 10646 says that
index 8f05d57..78fa623 100644 (file)
@@ -99,7 +99,7 @@ static int process_block (iconv_t cd, const char *addr, size_t len,
                          FILE *output);
 static int process_fd (iconv_t cd, int fd, FILE *output);
 static int process_file (iconv_t cd, FILE *input, FILE *output);
-static void print_known_names (void);
+static void print_known_names (void) internal_function;
 
 
 int
@@ -503,9 +503,38 @@ do_print  (const void *nodep, VISIT value, int level)
 }
 
 static void
+internal_function
+add_known_names (struct gconv_module *node)
+{
+  if (node->left != NULL)
+    add_known_names (node->left);
+  if (node->right != NULL)
+    add_known_names (node->right);
+  if (node->same != NULL)
+    add_known_names (node->same);
+  do
+    {
+      if (node->from_pattern == NULL)
+       {
+         if (strcmp (node->from_constpfx, "INTERNAL"))
+           tsearch (node->from_constpfx, &printlist,
+                    (__compar_fn_t) strverscmp);
+         if (strcmp (node->to_string, "INTERNAL"))
+           tsearch (node->to_string, &printlist, (__compar_fn_t) strverscmp);
+       }
+      else
+       if (strcmp (node->from_pattern, "INTERNAL"))
+         tsearch (node->from_pattern, &printlist, (__compar_fn_t) strverscmp);
+
+      node = node->matching;
+    }
+  while (node != NULL);
+}
+
+static void
+internal_function
 print_known_names (void)
 {
-  size_t cnt;
   iconv_t h;
 
   /* We must initialize the internal databases first.  */
@@ -516,22 +545,7 @@ print_known_names (void)
   twalk (__gconv_alias_db, insert_print_list);
 
   /* Add the from- and to-names from the known modules.  */
-  for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
-    {
-      if (__gconv_modules_db[cnt]->from_pattern == NULL)
-       {
-         if (strcmp (__gconv_modules_db[cnt]->from_constpfx, "INTERNAL"))
-           tsearch (__gconv_modules_db[cnt]->from_constpfx, &printlist,
-                    (__compar_fn_t) strverscmp);
-         if (strcmp (__gconv_modules_db[cnt]->to_string, "INTERNAL"))
-           tsearch (__gconv_modules_db[cnt]->to_string, &printlist,
-                    (__compar_fn_t) strverscmp);
-       }
-      else
-       if (strcmp (__gconv_modules_db[cnt]->from_pattern, "INTERNAL"))
-         tsearch (__gconv_modules_db[cnt]->from_pattern, &printlist,
-                  (__compar_fn_t) strverscmp);
-    }
+  add_known_names (__gconv_modules_db);
 
   fputs (_("\
 The following list contain all the coded character sets known.  This does\n\
index e9d63f3..a3a907c 100644 (file)
@@ -1,5 +1,7 @@
 # File name    Caller  Syscall name    # args  Strong name     Weak names
 
+vfork          -       vfork           0       vfork
+
 s_getgroups    getgroups getgroups     2       __syscall_getgroups
 s_getpagesize  getpagesize getpagesize 0       __syscall_getpagesize
 s_llseek       llseek  _llseek         5       __syscall__llseek
@@ -37,7 +39,6 @@ s_sigprocmask sigprocmask sigprocmask 3       __syscall_sigprocmask
 s_sigsuspend   sigsuspend sigsuspend   3       __syscall_sigsuspend
 s_sysctl       sysctl  _sysctl         1       __syscall__sysctl
 s_ustat                ustat   ustat           2       __syscall_ustat
-s_vfork                vfork   vfork           0       __syscall_vfork
 sys_fstat      fxstat  fstat           2       __syscall_fstat
 sys_lstat      lxstat  lstat           2       __syscall_lstat
 sys_mknod      xmknod  mknod           3       __syscall_mknod
index 0d76d25..bf4303b 100644 (file)
@@ -11,6 +11,7 @@ setrlimit     -       setrlimit       2       setrlimit       setrlimit64
 ftruncate      -       ftruncate       2       __ftruncate     ftruncate ftruncate64
 truncate       -       truncate        2       truncate        truncate64
 mmap           -       mmap            6       __mmap          mmap __mmap64 mmap64
+vfork          -       vfork           0       vfork
 
 # Override select.S in parent directory:
 select         -       select          5       __select        select
@@ -56,7 +57,6 @@ s_sigprocmask sigprocmask sigprocmask 3       __syscall_sigprocmask
 s_sigsuspend   sigsuspend sigsuspend   3       __syscall_sigsuspend
 s_sysctl       sysctl  _sysctl         1       __syscall__sysctl
 s_ustat                ustat   ustat           2       __syscall_ustat
-s_vfork                vfork   vfork           0       __syscall_vfork
 sys_fstat      fxstat  fstat           2       __syscall_fstat
 sys_lstat      lxstat  lstat           2       __syscall_lstat
 sys_mknod      xmknod  mknod           3       __syscall_mknod