Update.
authorUlrich Drepper <drepper@redhat.com>
Tue, 5 Sep 2000 02:41:25 +0000 (02:41 +0000)
committerUlrich Drepper <drepper@redhat.com>
Tue, 5 Sep 2000 02:41:25 +0000 (02:41 +0000)
2000-09-03  Bruno Haible  <haible@clisp.cons.org>

* charmaps/EUC-TW: Add commented non-reversible mappings.

2000-09-03  Bruno Haible  <haible@clisp.cons.org>

* charmaps/CP949: New file.

2000-09-03  Bruno Haible  <haible@clisp.cons.org>

* charmaps/GB2312: Remove 0x80..0xA0, 0xAA..0xAF, 0xF8..FF.

2000-09-03  Bruno Haible  <haible@clisp.cons.org>

* charmaps/EUC-JP: Nonreversibly map 0xA1C0 to U+005C and 0x8FA2B7 to
U+007E.

28 files changed:
elf/Makefile
iconv/gconv_builtin.c
iconv/gconv_db.c
iconv/gconv_dl.c
iconv/gconv_int.h
iconv/gconv_simple.c
iconv/gconv_trans.c
iconvdata/EUC-JP.irreversible [new file with mode: 0644]
iconvdata/ISIRI-3342.irreversible [new file with mode: 0644]
iconvdata/Makefile
iconvdata/SJIS.irreversible [new file with mode: 0644]
iconvdata/big5.c
iconvdata/cns11643.h
iconvdata/cns11643l1.c
iconvdata/cns11643l1.h
iconvdata/euc-tw.c
iconvdata/iso_6937-2.c
iconvdata/jis0208.c
iconvdata/jis0212.c
iconvdata/sjis.c
iconvdata/tst-table-charmap.sh [new file with mode: 0755]
iconvdata/tst-table-from.c [new file with mode: 0644]
iconvdata/tst-table-to.c [new file with mode: 0644]
iconvdata/tst-table.sh [new file with mode: 0755]
iconvdata/tst-tables.sh [new file with mode: 0755]
linuxthreads/attr.c
localedata/ChangeLog
sysdeps/ia64/Makefile

index 9f9fef9..ab71adf 100644 (file)
@@ -29,12 +29,15 @@ routines    = $(dl-routines) dl-open dl-close dl-symbol dl-support \
 dl-routines    = $(addprefix dl-,load cache lookup object reloc deps \
                                  runtime error init fini debug misc \
                                  version profile)
+all-dl-routines = $(dl-routines) $(sysdep-dl-routines)
 # But they are absent from the shared libc, because that code is in ld.so.
-elide-routines.os = $(dl-routines) dl-support enbl-secure
+elide-routines.os = $(all-dl-routines) dl-support enbl-secure
 
 # ld.so uses those routines, plus some special stuff for being the program
 # interpreter and operating independent of libc.
 rtld-routines  := rtld $(dl-routines) dl-sysdep dl-environ dl-minimal
+all-rtld-routines = $(rtld-routines) $(sysdep-dl-routines)
+
 distribute     := $(rtld-routines:=.c) dynamic-link.h do-rel.h dl-machine.h \
                   dl-cache.h dl-hash.h soinit.c sofini.c ldd.bash.in \
                   genrtldtbl.awk atomicity.h dl-procinfo.h ldsodefs.h \
@@ -60,7 +63,7 @@ ld-map                = $(common-objpfx)ld.map
 endif
 
 ifeq (yes,$(build-shared))
-extra-objs     = $(rtld-routines:=.os) soinit.os sofini.os interp.os
+extra-objs     = $(all-rtld-routines:%=%.os) soinit.os sofini.os interp.os
 generated      += librtld.os dl-allobjs.os ld.so ldd
 install-others = $(inst_slibdir)/$(rtld-installed-name)
 install-bin    = ldd
@@ -118,7 +121,7 @@ endif
 # Command to link into a larger single relocatable object.
 reloc-link = $(LINK.o) -nostdlib -nostartfiles -r -o $@
 
-$(objpfx)dl-allobjs.os: $(rtld-routines:%=$(objpfx)%.os)
+$(objpfx)dl-allobjs.os: $(all-rtld-routines:%=$(objpfx)%.os)
        $(reloc-link) $^
 
 # Link together the dynamic linker into a single relocatable object.
index 7a2072d..a530c82 100644 (file)
@@ -75,7 +75,6 @@ __gconv_get_builtin_trans (const char *name, struct __gconv_step *step)
   step->__fct = map[cnt].fct;
   step->__init_fct = map[cnt].init;
   step->__end_fct = map[cnt].end;
-  step->__counter = INT_MAX;
   step->__shlib_handle = NULL;
   step->__modname = NULL;
 
index ed2698a..dd51670 100644 (file)
@@ -163,7 +163,8 @@ free_derivation (void *p)
   size_t cnt;
 
   for (cnt = 0; cnt < deriv->nsteps; ++cnt)
-    if (deriv->steps[cnt].__end_fct)
+    if (deriv->steps[cnt].__counter > 0
+       && deriv->steps[cnt].__end_fct != NULL)
       DL_CALL_FCT (deriv->steps[cnt].__end_fct, (&deriv->steps[cnt]));
 
   /* Free the name strings.  */
@@ -175,6 +176,28 @@ free_derivation (void *p)
 }
 
 
+/* Decrement the reference count for a single step in a steps array.  */
+static inline void
+release_step (struct __gconv_step *step)
+{
+  if (--step->__counter == 0)
+    {
+      /* Call the destructor.  */
+      if (step->__end_fct != NULL)
+       DL_CALL_FCT (step->__end_fct, (step));
+
+#ifndef STATIC_GCONV
+      /* Skip builtin modules; they are not reference counted.  */
+      if (step->__shlib_handle != NULL)
+       {
+         /* Release the loaded module.  */
+         __gconv_release_shlib (step->__shlib_handle);
+         step->__shlib_handle = NULL;
+       }
+#endif
+    }
+}
+
 static int
 internal_function
 gen_steps (struct derivation_step *best, const char *toset,
@@ -222,7 +245,6 @@ gen_steps (struct derivation_step *best, const char *toset,
 
              result[step_cnt].__shlib_handle = shlib_handle;
              result[step_cnt].__modname = shlib_handle->name;
-             result[step_cnt].__counter = 1;
              result[step_cnt].__fct = shlib_handle->fct;
              result[step_cnt].__init_fct = shlib_handle->init_fct;
              result[step_cnt].__end_fct = shlib_handle->end_fct;
@@ -233,6 +255,8 @@ gen_steps (struct derivation_step *best, const char *toset,
            __gconv_get_builtin_trans (current->code->module_name,
                                       &result[step_cnt]);
 
+         result[step_cnt].__counter = 1;
+
          /* Call the init function.  */
          result[step_cnt].__data = NULL;
          if (result[step_cnt].__init_fct != NULL)
@@ -245,6 +269,7 @@ gen_steps (struct derivation_step *best, const char *toset,
                   failed = 1;
                   /* Make sure we unload this modules.  */
                   --step_cnt;
+                  result[step_cnt].__end_fct = NULL;
                   break;
                 }
             }
@@ -256,13 +281,7 @@ gen_steps (struct derivation_step *best, const char *toset,
        {
          /* Something went wrong while initializing the modules.  */
          while (++step_cnt < *nsteps)
-           {
-             if (result[step_cnt].__end_fct != NULL)
-               DL_CALL_FCT (result[step_cnt].__end_fct, (&result[step_cnt]));
-#ifndef STATIC_GCONV
-             __gconv_release_shlib (result[step_cnt].__shlib_handle);
-#endif
-           }
+           release_step (&result[step_cnt]);
          free (result);
          *nsteps = 0;
          *handle = NULL;
@@ -292,29 +311,38 @@ increment_counter (struct __gconv_step *steps, size_t nsteps)
   int result = __GCONV_OK;
 
   while (cnt-- > 0)
-    if (steps[cnt].__counter++ == 0)
-      {
-       steps[cnt].__shlib_handle =
-         __gconv_find_shlib (steps[cnt].__modname);
-       if (steps[cnt].__shlib_handle == NULL)
-         {
-           /* Oops, this is the second time we use this module (after
-              unloading) and this time loading failed!?  */
-           while (++cnt < nsteps)
-             __gconv_release_shlib (steps[cnt].__shlib_handle);
-           result = __GCONV_NOCONV;
-           break;
-         }
-
-       steps[cnt].__init_fct = steps[cnt].__shlib_handle->init_fct;
-       steps[cnt].__fct = steps[cnt].__shlib_handle->fct;
-       steps[cnt].__end_fct = steps[cnt].__shlib_handle->end_fct;
-
-       if (steps[cnt].__end_fct != NULL)
-         DL_CALL_FCT (steps[cnt].__end_fct, (&steps[cnt]));
-       if (steps[cnt].__init_fct != NULL)
-         DL_CALL_FCT (steps[cnt].__init_fct, (&steps[cnt]));
-      }
+    {
+      struct __gconv_step *step = &steps[cnt];
+
+      if (step->__counter++ == 0)
+       {
+         /* Skip builtin modules.  */
+         if (step->__modname != NULL)
+           {
+             /* Reopen a previously used module.  */
+             step->__shlib_handle = __gconv_find_shlib (step->__modname);
+             if (step->__shlib_handle == NULL)
+               {
+                 /* Oops, this is the second time we use this module
+                    (after unloading) and this time loading failed!?  */
+                 --step->__counter;
+                 while (++cnt < nsteps)
+                   release_step (&steps[cnt]);
+                 result = __GCONV_NOCONV;
+                 break;
+               }
+
+             /* The function addresses defined by the module may
+                have changed.  */
+             step->__fct = step->__shlib_handle->fct;
+             step->__init_fct = step->__shlib_handle->init_fct;
+             step->__end_fct = step->__shlib_handle->end_fct;
+           }
+
+         if (step->__init_fct != NULL)
+           DL_CALL_FCT (step->__init_fct, (step));
+       }
+    }
   return result;
 }
 #endif
@@ -333,9 +361,8 @@ find_derivation (const char *toset, const char *toset_expand,
   int best_cost_lo = INT_MAX;
   int result;
 
-  /* There is a small chance that this derivation is meanwhile found.  This
-     can happen if in `find_derivation' we look for this derivation, didn't
-     find it but at the same time another thread looked for this derivation. */
+  /* Look whether an earlier call to `find_derivation' has already
+     computed a possible derivation.  If so, return it immediately.  */
   result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
                              handle, nsteps);
   if (result == __GCONV_OK)
@@ -346,9 +373,32 @@ find_derivation (const char *toset, const char *toset_expand,
       return result;
     }
 
-  /* For now we use a simple algorithm with quadratic runtime behaviour.
-     The task is to match the `toset' with any of the available rules,
-     starting from FROMSET.  */
+  /* The task is to find a sequence of transformations, backed by the
+     existing modules - whether builtin or dynamically loadable -,
+     starting at `fromset' (or `fromset_expand') and ending at `toset'
+     (or `toset_expand'), and with minimal cost.
+
+     For computer scientists, this is a shortest path search in the
+     graph where the nodes are all possible charsets and the edges are
+     the transformations listed in __gconv_modules_db.
+
+     For now we use a simple algorithm with quadratic runtime behaviour.
+     A breadth-first search, starting at `fromset' and `fromset_expand'.
+     The list starting at `first' contains all nodes that have been
+     visited up to now, in the order in which they have been visited --
+     excluding the goal nodes `toset' and `toset_expand' which get
+     managed in the list starting at `solution'.
+     `current' walks through the list starting at `first' and looks
+     which nodes are reachable from the current node, adding them to
+     the end of the list [`first' or `solution' respectively] (if
+     they are visited the first time) or updating them in place (if
+     they have have already been visited).
+     In each node of either list, cost_lo and cost_hi contain the
+     minimum cost over any paths found up to now, starting at `fromset'
+     or `fromset_expand', ending at that node.  best_cost_lo and
+     best_cost_hi represent the minimum over the elements of the
+     `solution' list.  */
+
   if (fromset_expand != NULL)
     {
       first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
@@ -373,16 +423,17 @@ find_derivation (const char *toset, const char *toset_expand,
         searching for prefixes.  So we search for the first entry with a
         matching prefix and any other matching entry can be found from
         this place.  */
-      struct gconv_module *node = __gconv_modules_db;
+      struct gconv_module *node;
 
       /* Maybe it is not necessary anymore to look for a solution for
-        this entry since the cost is already as high (or heigher) as
+        this entry since the cost is already as high (or higher) as
         the cost for the best solution so far.  */
       if (current->cost_hi > best_cost_hi
          || (current->cost_hi == best_cost_hi
              && current->cost_lo >= best_cost_lo))
        continue;
 
+      node = __gconv_modules_db;
       while (node != NULL)
        {
          int cmpres = strcmp (current->result_set, node->from_string);
@@ -404,37 +455,52 @@ find_derivation (const char *toset, const char *toset_expand,
                  struct derivation_step *step;
 
                  /* We managed to find a derivation.  First see whether
-                    this is what we are looking for.  */
+                    we have reached one of the goal nodes.  */
                  if (strcmp (result_set, toset) == 0
                      || (toset_expand != NULL
                          && strcmp (result_set, toset_expand) == 0))
                    {
-                     if (solution == NULL || cost_hi < best_cost_hi
+                     /* Append to the `solution' list if there
+                        is no entry with this name.  */
+                     for (step = solution; step != NULL; step = step->next)
+                       if (strcmp (result_set, step->result_set) == 0)
+                         break;
+
+                     if (step == NULL)
+                       {
+                         step = NEW_STEP (result_set,
+                                          cost_hi, cost_lo,
+                                          runp, current);
+                         step->next = solution;
+                         solution = step;
+                       }
+                     else if (step->cost_hi > cost_hi
+                              || (step->cost_hi == cost_hi
+                                  && step->cost_lo > cost_lo))
+                       {
+                         /* A better path was found for the node,
+                            on the `solution' list.  */
+                         step->code = runp;
+                         step->last = current;
+                         step->cost_hi = cost_hi;
+                         step->cost_lo = cost_lo;
+                       }
+
+                     /* Update best_cost accordingly.  */
+                     if (cost_hi < best_cost_hi
                          || (cost_hi == best_cost_hi
                              && cost_lo < best_cost_lo))
                        {
                          best_cost_hi = cost_hi;
                          best_cost_lo = cost_lo;
                        }
-
-                     /* Append this solution to list.  */
-                     if (solution == NULL)
-                       solution = NEW_STEP (result_set, 0, 0, runp, current);
-                     else
-                       {
-                         while (solution->next != NULL)
-                           solution = solution->next;
-
-                         solution->next = NEW_STEP (result_set, 0, 0,
-                                                    runp, current);
-                       }
                    }
                  else if (cost_hi < best_cost_hi
                           || (cost_hi == best_cost_hi
                               && cost_lo < best_cost_lo))
                    {
-                     /* Append at the end if there is no entry with
-                        this name.  */
+                     /* Append at the end of the `first' list if there
+                        is no entry with this name.  */
                      for (step = first; step != NULL; step = step->next)
                        if (strcmp (result_set, step->result_set) == 0)
                          break;
@@ -450,31 +516,36 @@ find_derivation (const char *toset, const char *toset_expand,
                               || (step->cost_hi == cost_hi
                                   && step->cost_lo > cost_lo))
                        {
+                         /* A better path was found for the node,
+                            on the `first' list.  */
                          step->code = runp;
                          step->last = current;
 
                          /* Update the cost for all steps.  */
                          for (step = first; step != NULL;
                               step = step->next)
-                           {
-                             struct derivation_step *back;
-
-                             if (step->code == NULL)
-                               /* This is one of the entries we started
-                                  from.  */
-                               continue;
-
-                             step->cost_hi = step->code->cost_hi;
-                             step->cost_lo = step->code->cost_lo;
-
-                             for (back = step->last; back->code != NULL;
-                                  back = back->last)
-                               {
-                                 step->cost_hi += back->code->cost_hi;
-                                 step->cost_lo += back->code->cost_lo;
-                               }
-                           }
-
+                           /* But don't update the start nodes.  */
+                           if (step->code != NULL)
+                             {
+                               struct derivation_step *back;
+                               int hi, lo;
+
+                               hi = step->code->cost_hi;
+                               lo = step->code->cost_lo;
+
+                               for (back = step->last; back->code != NULL;
+                                    back = back->last)
+                                 {
+                                   hi += back->code->cost_hi;
+                                   lo += back->code->cost_lo;
+                                 }
+
+                               step->cost_hi = hi;
+                               step->cost_lo = lo;
+                             }
+
+                         /* Likewise for the nodes on the solution list.
+                            Also update best_cost accordingly.  */
                          for (step = solution; step != NULL;
                               step = step->next)
                            {
@@ -487,7 +558,6 @@ find_derivation (const char *toset, const char *toset_expand,
                                  || (step->cost_hi == best_cost_hi
                                      && step->cost_lo < best_cost_lo))
                                {
-                                 solution = step;
                                  best_cost_hi = step->cost_hi;
                                  best_cost_lo = step->cost_lo;
                                }
@@ -509,10 +579,26 @@ find_derivation (const char *toset, const char *toset_expand,
     }
 
   if (solution != NULL)
-    /* We really found a way to do the transformation.  Now build a data
-       structure describing the transformation steps.*/
-    result = gen_steps (solution, toset_expand ?: toset,
-                       fromset_expand ?: fromset, handle, nsteps);
+    {
+      /* We really found a way to do the transformation.  */
+
+      /* Choose the best solution.  This is easy because we know that
+        the solution list has at most length 2 (one for every possible
+        goal node).  */
+      if (solution->next != NULL)
+       {
+         struct derivation_step *solution2 = solution->next;
+
+         if (solution2->cost_hi < solution->cost_hi
+             || (solution2->cost_hi == solution->cost_hi
+                 && solution2->cost_lo < solution->cost_lo))
+           solution = solution2;
+       }
+
+      /* Now build a data structure describing the transformation steps.  */
+      result = gen_steps (solution, toset_expand ?: toset,
+                         fromset_expand ?: fromset, handle, nsteps);
+    }
   else
     {
       /* We haven't found a transformation.  Clear the result values.  */
@@ -609,14 +695,7 @@ __gconv_close_transform (struct __gconv_step *steps, size_t nsteps)
   __libc_lock_lock (lock);
 
   while (nsteps-- > 0)
-    if (steps[nsteps].__shlib_handle != NULL
-       && --steps[nsteps].__counter == 0)
-      {
-       result = __gconv_release_shlib (steps[nsteps].__shlib_handle);
-       if (result != __GCONV_OK)
-         break;
-       steps[nsteps].__shlib_handle = NULL;
-      }
+    release_step (&steps[nsteps]);
 
   /* Release the lock.  */
   __libc_lock_unlock (lock);
index d07f84e..308db52 100644 (file)
@@ -164,7 +164,9 @@ do_release_shlib (const void *nodep, VISIT value, int level)
     }
   else if (obj->counter <= 0)
     {
-      if (--obj->counter < -TRIES_BEFORE_UNLOAD && obj->handle != NULL)
+      if (obj->counter >= -TRIES_BEFORE_UNLOAD)
+       --obj->counter;
+      if (obj->counter < -TRIES_BEFORE_UNLOAD && obj->handle != NULL)
        {
          /* Unload the shared object.  */
          __libc_dlclose (obj->handle);
@@ -175,7 +177,7 @@ do_release_shlib (const void *nodep, VISIT value, int level)
 
 
 /* Notify system that a shared object is not longer needed.  */
-int
+void
 internal_function
 __gconv_release_shlib (struct __gconv_loaded_object *handle)
 {
@@ -186,8 +188,6 @@ __gconv_release_shlib (struct __gconv_loaded_object *handle)
      with release counts <= 0.  This way we can finally unload them
      if necessary.  */
   __twalk (loaded, do_release_shlib);
-
-  return __GCONV_OK;
 }
 
 
index 8333a21..ad2d6e7 100644 (file)
@@ -55,7 +55,7 @@ struct gconv_alias
    object is also handled.  */
 struct __gconv_loaded_object
 {
-  /* Name of the object.  */
+  /* Name of the object.  It must be the first structure element.  */
   const char *name;
 
   /* Reference counter for the db functionality.  If no conversion is
@@ -201,7 +201,7 @@ extern struct __gconv_loaded_object *__gconv_find_shlib (const char *name)
 
 /* Release shared object.  If no further reference is available unload
    the object.  */
-extern int __gconv_release_shlib (struct __gconv_loaded_object *handle)
+extern void __gconv_release_shlib (struct __gconv_loaded_object *handle)
      internal_function;
 
 /* Fill STEP with information about builtin module with NAME.  */
index 9376b15..a41e1b5 100644 (file)
 #endif
 
 
-/* These are definitions used by some of the functions for handling
-   UTF-8 encoding below.  */
-static const uint32_t encoding_mask[] =
-{
-  ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
-};
-
-static const unsigned char encoding_byte[] =
-{
-  0xc0, 0xe0, 0xf0, 0xf8, 0xfc
-};
-
-
 /* Transform from the internal, UCS4-like format, to UCS4.  The
    difference between the internal ucs4 format and the real UCS4
    format is, if any, the endianess.  The Unicode/ISO 10646 says that
@@ -856,7 +843,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
        char *start;                                                          \
                                                                              \
        for (step = 2; step < 6; ++step)                                      \
-         if ((wc & encoding_mask[step - 2]) == 0)                            \
+         if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0)                   \
            break;                                                            \
                                                                              \
        if (__builtin_expect (outptr + step > outend, 0))                     \
@@ -867,7 +854,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
          }                                                                   \
                                                                              \
        start = outptr;                                                       \
-       *outptr = encoding_byte[step - 2];                                    \
+       *outptr = (unsigned char) (~0xff >> step);                            \
        outptr += step;                                                       \
        --step;                                                               \
        do                                                                    \
index 8c658b1..4a42a35 100644 (file)
@@ -330,6 +330,7 @@ __gconv_translit_find (struct trans_struct *trans)
            {
              /* Copy the data.  */
              *trans = (*found)->info;
+             (*found)->open_count++;
              res = 0;
            }
        }
@@ -345,7 +346,7 @@ __gconv_translit_find (struct trans_struct *trans)
        __gconv_get_path ();
 
       /* See whether we have to append .so.  */
-      if (name_len <= 3 || memcmp (&trans->name[name_len - 3], ".so", 3) != 0)
+      if (name_len <= 4 || memcmp (&trans->name[name_len - 4], ".so", 3) != 0)
        need_so = 1;
 
       /* Create a new entry.  */
@@ -366,7 +367,7 @@ __gconv_translit_find (struct trans_struct *trans)
 
          newp->fname = cp;
 
-         /* Seach in all the directories.  */
+         /* Search in all the directories.  */
          for (runp = __gconv_path_elem; runp->name != NULL; ++runp)
            {
              cp = __mempcpy (__stpcpy ((char *) newp->fname, runp->name),
@@ -382,6 +383,9 @@ __gconv_translit_find (struct trans_struct *trans)
                }
            }
 
+         if (res)
+           newp->fname = NULL;
+
          /* In any case we'll add the entry to our search tree.  */
          if (__tsearch (newp, &search_tree, trans_compare) == NULL)
            {
diff --git a/iconvdata/EUC-JP.irreversible b/iconvdata/EUC-JP.irreversible
new file mode 100644 (file)
index 0000000..e4c0ed5
--- /dev/null
@@ -0,0 +1,6 @@
+0x5C   0x00A5
+0x7E   0x203E
+0x8FA2B7       0x007E
+0x8FA2B7       0xFF5E
+0xA1C0 0x005C
+0xA1C0 0xFF3C
diff --git a/iconvdata/ISIRI-3342.irreversible b/iconvdata/ISIRI-3342.irreversible
new file mode 100644 (file)
index 0000000..131393b
--- /dev/null
@@ -0,0 +1,52 @@
+0x80   0x0000
+0x81   0x0001
+0x82   0x0002
+0x83   0x0003
+0x84   0x0004
+0x85   0x0005
+0x86   0x0006
+0x87   0x0007
+0x88   0x0008
+0x89   0x0009
+0x8A   0x000A
+0x8B   0x000B
+0x8C   0x000C
+0x8D   0x000D
+0x8E   0x000E
+0x8F   0x000F
+0x90   0x0010
+0x91   0x0011
+0x92   0x0012
+0x93   0x0013
+0x94   0x0014
+0x95   0x0015
+0x96   0x0016
+0x97   0x0017
+0x98   0x0018
+0x99   0x0019
+0x9A   0x001A
+0x9B   0x001B
+0x9C   0x001C
+0x9D   0x001D
+0x9E   0x001E
+0x9F   0x001F
+0xA0   0x0020
+0xA3   0x0021
+0xA6   0x002E
+0xA8   0x0029
+0xA9   0x0028
+0xAB   0x002B
+0xAD   0x002D
+0xAF   0x002F
+0xBA   0x003A
+0xBC   0x003C
+0xBD   0x003D
+0xBE   0x003E
+0xE2   0x005D
+0xE3   0x005B
+0xE4   0x007D
+0xE5   0x007B
+0xE8   0x002A
+0xEA   0x007C
+0xEB   0x005C
+0xFF   0x007F
index eaeeb41..687ca4e 100644 (file)
@@ -51,6 +51,8 @@ modules.so := $(addsuffix .so, $(modules))
 
 tests = bug-iconv1 bug-iconv2 tst-loading
 
+test-srcs := tst-table-from tst-table-to
+
 include ../Makeconfig
 
 libJIS-routines := jis0201 jis0208 jis0212
@@ -89,7 +91,10 @@ LDFLAGS-libISOIR165.so = -Wl,-soname,$(@F)
 distribute := gconv-modules extra-module.mk gap.awk gaptab.awk             \
              gen-8bit.sh gen-8bit-gap.sh gen-8bit-gap-1.sh                 \
              TESTS $(filter-out testdata/CVS%, $(wildcard testdata/*))     \
-             run-iconv-test.sh 8bit-generic.c 8bit-gap.c                   \
+             run-iconv-test.sh tst-tables.sh tst-table.sh                  \
+             tst-table-charmap.sh tst-table-from.c tst-table-to.c          \
+             EUC-JP.irreversible ISIRI-3342.irreversible SJIS.irreversible \
+             8bit-generic.c 8bit-gap.c                                     \
              ansi_x3.110.c asmo_449.c big5.c cp737.c cp737.h               \
              cp775.c cp775.h ibm874.c cns11643.c cns11643.h                \
              cns11643l1.c cns11643l1.h cp1250.c cp1251.c cp1252.c cp1253.c \
@@ -244,7 +249,8 @@ include ../Rules
 
 ifeq (no,$(cross-compiling))
 ifeq (yes,$(build-shared))
-tests: $(objpfx)iconv-test.out $(objpfx)mtrace-tst-loading
+tests: $(objpfx)iconv-test.out $(objpfx)tst-tables.out \
+       $(objpfx)mtrace-tst-loading
 endif
 endif
 
@@ -259,6 +265,17 @@ $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \
                         $(common-objdir)/iconv/iconv_prog TESTS
        $(SHELL) -e $< $(common-objdir) > $@
 
+$(objpfx)tst-tables.out: tst-tables.sh $(objpfx)gconv-modules \
+                        $(addprefix $(objpfx),$(modules.so)) \
+                        $(objpfx)tst-table-from $(objpfx)tst-table-to
+       $(SHELL) $< $(common-objpfx) $(common-objpfx)iconvdata/ > $@
+
+do-tests-clean common-mostlyclean: tst-tables-clean
+
+.PHONY: tst-tables-clean
+tst-tables-clean:
+       -rm -f $(objpfx)tst-*.table $(objpfx)tst-EUC-TW.irreversible
+
 ifdef objpfx
 $(objpfx)gconv-modules: gconv-modules
        cp $^ $@
diff --git a/iconvdata/SJIS.irreversible b/iconvdata/SJIS.irreversible
new file mode 100644 (file)
index 0000000..dfc678a
--- /dev/null
@@ -0,0 +1,7 @@
+0x5C   0x005C
+0x7E   0x007E
+0x815F 0x005C
+0x815F 0xFF3C
+0x8191 0xFFE0
+0x8192 0xFFE1
+0x81CA 0xFFE2
index 9cf5a5f..8748036 100644 (file)
@@ -8644,8 +8644,21 @@ static const char from_ucs4_tab15[][2] =
                                                                              \
        inptr += 2;                                                           \
       }                                                                              \
-    else                                                                     \
+    else if (ch <= 0x80)                                                     \
       ++inptr;                                                               \
+    else                                                                     \
+      {                                                                              \
+       /* This is illegal.  */                                               \
+       if (! ignore_errors_p ())                                             \
+         {                                                                   \
+           result = __GCONV_ILLEGAL_INPUT;                                   \
+           break;                                                            \
+         }                                                                   \
+                                                                             \
+       ++inptr;                                                              \
+       ++*irreversible;                                                      \
+       continue;                                                             \
+      }                                                                              \
                                                                              \
     put32 (outptr, ch);                                                              \
     outptr += 4;                                                             \
index 27c484b..b57aa9d 100644 (file)
@@ -1,5 +1,5 @@
 /* Access functions for CNS 11643, plane 2 handling.
-   Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+   Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
 
@@ -50,19 +50,19 @@ cns11643_to_ucs4 (const char **s, size_t avail, unsigned char offset)
 
   idx = (ch2 - 0x21 - offset) * 94 + (ch3 - 0x21 - offset);
 
-  if ((ch - 0x21 - offset) == 1)
+  if ((ch - 0x20 - offset) == 1)
     {
-      if (idx > 0x2196)
+      if (idx > 0x21f2)
        return __UNKNOWN_10646_CHAR;
       result = __cns11643l1_to_ucs4_tab[idx];
     }
-  else if ((ch - 0x21 - offset) == 2)
+  else if ((ch - 0x20 - offset) == 2)
     {
       if (idx > 0x1de1)
        return __UNKNOWN_10646_CHAR;
       result = __cns11643l2_to_ucs4_tab[idx];
     }
-  else if ((ch - 0x21 - offset) == 0xe)
+  else if ((ch - 0x20 - offset) == 0xe)
     {
       if (idx > 0x19bd)
        return __UNKNOWN_10646_CHAR;
@@ -166,7 +166,7 @@ ucs4_to_cns11643 (uint32_t wch, char *s, size_t avail)
       break;
     case 0x3105 ... 0x3129:
       buf[0] = '\x25';
-      buf[1] = '\x26' + (ch - 0x3105);
+      buf[1] = '\x47' + (ch - 0x3105);
       break;
     case 0x32a3:
       cp = "\x22\x21";
@@ -177,12 +177,13 @@ ucs4_to_cns11643 (uint32_t wch, char *s, size_t avail)
     case 0x4e00 ... 0x9f9c:
       cp = __cns11643l1_from_ucs4_tab12[ch - 0x4e00];
 
-      if (cp[0] == '\0')
-       {
-         /* Let's try the other planes.  */
-         needed = 3;
-         cp = __cns11643_from_ucs4_tab[ch - 0x4e00];
-       }
+      if (cp[0] != '\0')
+       break;
+      /* FALLTHROUGH.  Let's try the other planes.  */
+    case 0x9f9d ... 0x9fa5:
+      /* Let's try the other planes.  */
+      needed = 3;
+      cp = __cns11643_from_ucs4_tab[ch - 0x4e00];
       break;
     case 0xfe30 ... 0xfe6b:
       cp = __cns11643l1_from_ucs4_tab13[ch - 0xfe30];
index d106b3d..2372d64 100644 (file)
@@ -1,5 +1,5 @@
 /* Mapping tables for CNS 11643, plane 1 handling.
-   Copyright (C) 1998 Free Software Foundation, Inc.
+   Copyright (C) 1998, 2000 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
 
@@ -1637,7 +1637,8 @@ const char __cns11643l1_from_ucs4_tab6[][2] =
   [0x000a] = "\x22\x49", [0x000b] = "\x22\x48", [0x0014] = "\x22\x45",
   [0x0015] = "\x22\x46", [0x0016] = "\x22\x4d", [0x0019] = "\x22\x4e",
   [0x001f] = "\x22\x50", [0x0020] = "\x22\x4f", [0x0027] = "\x22\x44",
-  [0x003d] = "\x22\x3d",
+  [0x003d] = "\x22\x3d", [0x004b] = "\x22\x3b", [0x004c] = "\x22\x3e",
+  [0x0051] = "\x22\x39", [0x0052] = "\x22\x3a",
 };
 
 /* Graphic pictures for control codes.  The table can be created using
index 1aa5918..6072c83 100644 (file)
@@ -1,5 +1,5 @@
 /* Access functions for CNS 11643, plane 1 handling.
-   Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+   Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
 
@@ -44,7 +44,7 @@ cns11643l1_to_ucs4 (const unsigned char **s, size_t avail,
     return __UNKNOWN_10646_CHAR;
 
   idx = (ch - 0x21 - offset) * 94 + (ch2 - 0x21 - offset);
-  if (idx > 0x2196)
+  if (idx > 0x21f2)
     return __UNKNOWN_10646_CHAR;
 
   (*s) += 2;
@@ -137,7 +137,7 @@ ucs4_to_cns11643l1 (uint32_t wch, unsigned char *s, size_t avail)
       break;
     case 0x3105 ... 0x3129:
       buf[0] = '\x25';
-      buf[1] = '\x26' + (ch - 0x3105);
+      buf[1] = '\x47' + (ch - 0x3105);
       break;
     case 0x32a3:
       cp = "\x22\x21";
index ed7a197..39c2f1f 100644 (file)
@@ -65,7 +65,7 @@
           character is also available.  */                                   \
        uint32_t ch2;                                                         \
                                                                              \
-       if (inptr + (ch == 0x8e ? 3 : 1) >= inend)                            \
+       if (inptr + 1 >= inend)                                               \
          {                                                                   \
            /* The second character is not available.  Store the              \
               intermediate result.  */                                       \
@@ -73,7 +73,7 @@
            break;                                                            \
          }                                                                   \
                                                                              \
-       ch2 = *inptr;                                                         \
+       ch2 = *(inptr + 1);                                                   \
                                                                              \
        /* All second bytes of a multibyte character must be >= 0xa1. */      \
        if (ch2 < 0xa1 || ch2 == 0xff)                                        \
            const char *endp = inptr + 1;                                     \
                                                                              \
            ch = cns11643_to_ucs4 (&endp, inend - inptr - 1, 0x80);           \
-           /* Please note that we need not test for the missing input        \
-              characters here anymore.  */                                   \
+                                                                             \
+           if (ch == 0)                                                      \
+             {                                                               \
+               /* The third or fourth character is not available.  Store     \
+                  the intermediate result.  */                               \
+               result = __GCONV_INCOMPLETE_INPUT;                            \
+               break;                                                        \
+             }                                                               \
+                                                                             \
            if (ch == __UNKNOWN_10646_CHAR)                                   \
              {                                                               \
                /* Illegal input.  */                                         \
index dda8acd..01c0c58 100644 (file)
@@ -46,7 +46,7 @@ static const uint32_t to_ucs4[256] =
   /* 0x98 */ 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   /* 0xa0 */ 0x0000, 0x00a1, 0x00a2, 0x00a3, 0x0024, 0x00a5, 0x0000, 0x00a7,
   /* 0xa8 */ 0x0000, 0x2018, 0x201c, 0x00ab, 0x2190, 0x2191, 0x2192, 0x2193,
-  /* 0xb0 */ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
+  /* 0xb0 */ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00d7, 0x00b5, 0x00b6, 0x00b7,
   /* 0xb8 */ 0x00f7, 0x2019, 0x201d, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
   /* 0xc0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   /* 0xc8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
index 06558f6..bd92e12 100644 (file)
@@ -67,7 +67,7 @@ const uint16_t __jis0208_to_ucs[0x1e80] =
   [0x0010] = 0xffe3, [0x0011] = 0xff3f, [0x0012] = 0x30fd, [0x0013] = 0x30fe,
   [0x0014] = 0x309d, [0x0015] = 0x309e, [0x0016] = 0x3003, [0x0017] = 0x4edd,
   [0x0018] = 0x3005, [0x0019] = 0x3006, [0x001a] = 0x3007, [0x001b] = 0x30fc,
-  [0x001c] = 0x2015, [0x001d] = 0x2010, [0x001e] = 0xff0f, [0x001f] = 0xff3c,
+  [0x001c] = 0x2015, [0x001d] = 0x2010, [0x001e] = 0xff0f, [0x001f] = 0x005c,
   [0x0020] = 0x301c, [0x0021] = 0x2016, [0x0022] = 0xff5c, [0x0023] = 0x2026,
   [0x0024] = 0x2025, [0x0025] = 0x2018, [0x0026] = 0x2019, [0x0027] = 0x201c,
   [0x0028] = 0x201d, [0x0029] = 0xff08, [0x002a] = 0xff09, [0x002b] = 0x3014,
index 36857b6..b8baaa3 100644 (file)
@@ -111,7 +111,7 @@ const struct jisx0212_idx __jisx0212_to_ucs_idx[] =
 const uint16_t __jisx0212_to_ucs[] =
 {
   0x02d8, 0x02c7, 0x00b8, 0x02d9, 0x02dd, 0x00af, 0x02db, 0x02da,
-  0xff5e, 0x0384, 0x0385, 0x00a1, 0x00a6, 0x00bf, 0x00ba, 0x00aa,
+  0x007e, 0x0384, 0x0385, 0x00a1, 0x00a6, 0x00bf, 0x00ba, 0x00aa,
   0x00a9, 0x00ae, 0x2122, 0x00a4, 0x2116, 0x0386, 0x0388, 0x0389,
   0x038a, 0x03aa, 000000, 0x038c, 000000, 0x038e, 0x03ab, 000000,
   0x038f, 000000, 000000, 000000, 000000, 0x03ac, 0x03ad, 0x03ae,
index 7bbfef3..33a2056 100644 (file)
@@ -67,7 +67,7 @@ static const uint16_t cjk_block1[703] =
   [ 16] = 0xFFE3, [ 17] = 0xFF3F, [ 18] = 0x30FD, [ 19] = 0x30FE,
   [ 20] = 0x309D, [ 21] = 0x309E, [ 22] = 0x3003, [ 23] = 0x4EDD,
   [ 24] = 0x3005, [ 25] = 0x3006, [ 26] = 0x3007, [ 27] = 0x30FC,
-  [ 28] = 0x2015, [ 29] = 0x2010, [ 30] = 0xFF0F, [ 31] = 0xFF3C,
+  [ 28] = 0x2015, [ 29] = 0x2010, [ 30] = 0xFF0F, [ 31] = 0x005C,
   [ 32] = 0x301C, [ 33] = 0x2016, [ 34] = 0xFF5C, [ 35] = 0x2026,
   [ 36] = 0x2025, [ 37] = 0x2018, [ 38] = 0x2019, [ 39] = 0x201C,
   [ 40] = 0x201D, [ 41] = 0xFF08, [ 42] = 0xFF09, [ 43] = 0x3014,
diff --git a/iconvdata/tst-table-charmap.sh b/iconvdata/tst-table-charmap.sh
new file mode 100755 (executable)
index 0000000..99c713c
--- /dev/null
@@ -0,0 +1,35 @@
+#!/bin/sh
+# Copyright (C) 2000 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+# Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with the GNU C Library; see the file COPYING.LIB.  If not,
+# write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# Converts a glibc format charmap to a simple format .table file.
+
+LC_ALL=C
+export LC_ALL
+
+case "$1" in
+  POSIX )
+    # Old POSIX/DKUUG borrowed format
+    grep '^<.*>.*/x[0-9A-Fa-f]*[       ]*<U....>.*$' | grep -v 'not a real character' | sed -e 's,^<.*>[       ]*\([/x0-9A-Fa-f]*\)[   ]*<U\(....\)>.*$,\1     0x\2,' | tr abcdef ABCDEF | sed -e 's,/x\([0-9A-F][0-9A-F]\),\1,g' | sed -e 's,^,0x,' | sort | uniq | grep -v '^0x00    0x\([1-9A-F]...\|.[1-9A-F]..\|..[1-9A-F].\|...[1-9A-F]\)'
+    ;;
+  *)
+    # New Unicode based format
+    sed -e 's,^%IRREVERSIBLE%,,' | grep '^<U....>[     ]*/x' | grep -v 'not a real character' | sed -e 's,<U\(....\)>[         ]*\([/x0-9A-Fa-f]*\).*$,\2      0x\1,' | tr abcdef ABCDEF | sed -e 's,/x\([0-9A-F][0-9A-F]\),\1,g' | sed -e 's,^,0x,' | sort | uniq | grep -v '^0x00    0x\([1-9A-F]...\|.[1-9A-F]..\|..[1-9A-F].\|...[1-9A-F]\)'
+    ;;
+esac
diff --git a/iconvdata/tst-table-from.c b/iconvdata/tst-table-from.c
new file mode 100644 (file)
index 0000000..bd2647e
--- /dev/null
@@ -0,0 +1,225 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* Create a table from CHARSET to Unicode.
+   This is a good test for CHARSET's iconv() module, in particular the
+   FROM_LOOP BODY macro.  */
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <iconv.h>
+#include <errno.h>
+
+/* Converts a byte buffer to a hexadecimal string.  */
+static const char*
+hexbuf (unsigned char buf[], unsigned int buflen)
+{
+  static char msg[50];
+
+  switch (buflen)
+    {
+    case 1:
+      sprintf (msg, "0x%02X", buf[0]);
+      break;
+    case 2:
+      sprintf (msg, "0x%02X%02X", buf[0], buf[1]);
+      break;
+    case 3:
+      sprintf (msg, "0x%02X%02X%02X", buf[0], buf[1], buf[2]);
+      break;
+    case 4:
+      sprintf (msg, "0x%02X%02X%02X%02X", buf[0], buf[1], buf[2], buf[3]);
+      break;
+    default:
+      abort ();
+    }
+  return msg;
+}
+
+/* Attempts to convert a byte buffer BUF (BUFLEN bytes) to OUT (6 bytes)
+   using the conversion descriptor CD.  Returns the number of written bytes,
+   or 0 if ambiguous, or -1 if invalid.  */
+static int
+try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned char *out)
+{
+  const char *inbuf = (const char *) buf;
+  size_t inbytesleft = buflen;
+  char *outbuf = (char *) out;
+  size_t outbytesleft = 6;
+  size_t result = iconv (cd,
+                        (char *) &inbuf, &inbytesleft,
+                        &outbuf, &outbytesleft);
+  if (result == (size_t)(-1))
+    {
+      if (errno == EILSEQ)
+       {
+         return -1;
+       }
+      else if (errno == EINVAL)
+       {
+         return 0;
+       }
+      else
+       {
+         int saved_errno = errno;
+         fprintf (stderr, "%s: iconv error: ", hexbuf (buf, buflen));
+         errno = saved_errno;
+         perror ("");
+         exit (1);
+       }
+    }
+  else
+    {
+      if (inbytesleft != 0)
+       {
+         fprintf (stderr, "%s: inbytes = %ld, outbytes = %ld\n",
+                  hexbuf (buf, buflen),
+                  (long) (buflen - inbytesleft),
+                  (long) (6 - outbytesleft));
+         exit (1);
+       }
+      return 6 - outbytesleft;
+    }
+}
+
+/* Returns the out[] buffer as a Unicode value.  */
+static unsigned int
+utf8_decode (const unsigned char *out, unsigned int outlen)
+{
+  return (outlen==1 ? out[0] :
+         outlen==2 ? ((out[0] & 0x1f) << 6) + (out[1] & 0x3f) :
+         outlen==3 ? ((out[0] & 0x0f) << 12) + ((out[1] & 0x3f) << 6) + (out[2] & 0x3f) :
+         outlen==4 ? ((out[0] & 0x07) << 18) + ((out[1] & 0x3f) << 12) + ((out[2] & 0x3f) << 6) + (out[3] & 0x3f) :
+         outlen==5 ? ((out[0] & 0x03) << 24) + ((out[1] & 0x3f) << 18) + ((out[2] & 0x3f) << 12) + ((out[3] & 0x3f) << 6) + (out[4] & 0x3f) :
+         outlen==6 ? ((out[0] & 0x01) << 30) + ((out[1] & 0x3f) << 24) + ((out[2] & 0x3f) << 18) + ((out[3] & 0x3f) << 12) + ((out[4] & 0x3f) << 6) + (out[5] & 0x3f) :
+         0xfffd);
+}
+
+int
+main (int argc, char *argv[])
+{
+  const char *charset;
+  iconv_t cd;
+
+  if (argc != 2)
+    {
+      fprintf (stderr, "Usage: tst-table-to charset\n");
+      exit (1);
+    }
+  charset = argv[1];
+
+  cd = iconv_open ("UTF-8", charset);
+  if (cd == (iconv_t)(-1))
+    {
+      perror ("iconv_open");
+      exit (1);
+    }
+
+  {
+    unsigned char out[6];
+    unsigned char buf[4];
+    unsigned int i0, i1, i2, i3;
+    int result;
+
+    for (i0 = 0; i0 < 0x100; i0++)
+      {
+       buf[0] = i0;
+       result = try (cd, buf, 1, out);
+       if (result < 0)
+         {
+         }
+       else if (result > 0)
+         {
+           printf ("0x%02X\t0x%04X\n",
+                   i0, utf8_decode (out, result));
+         }
+       else
+         {
+           for (i1 = 0; i1 < 0x100; i1++)
+             {
+               buf[1] = i1;
+               result = try (cd, buf, 2, out);
+               if (result < 0)
+                 {
+                 }
+               else if (result > 0)
+                 {
+                   printf ("0x%02X%02X\t0x%04X\n",
+                           i0, i1, utf8_decode (out, result));
+                 }
+               else
+                 {
+                   for (i2 = 0; i2 < 0x100; i2++)
+                     {
+                       buf[2] = i2;
+                       result = try (cd, buf, 3, out);
+                       if (result < 0)
+                         {
+                         }
+                       else if (result > 0)
+                         {
+                           printf ("0x%02X%02X%02X\t0x%04X\n",
+                                   i0, i1, i2, utf8_decode (out, result));
+                         }
+                       else if (strcmp (charset, "UTF-8"))
+                         {
+                           for (i3 = 0; i3 < 0x100; i3++)
+                             {
+                               buf[3] = i3;
+                               result = try (cd, buf, 4, out);
+                               if (result < 0)
+                                 {
+                                 }
+                               else if (result > 0)
+                                 {
+                                   printf ("0x%02X%02X%02X%02X\t0x%04X\n",
+                                           i0, i1, i2, i3,
+                                           utf8_decode (out, result));
+                                 }
+                               else
+                                 {
+                                   fprintf (stderr,
+                                            "%s: incomplete byte sequence\n",
+                                            hexbuf (buf, 4));
+                                   exit (1);
+                                 }
+                             }
+                         }
+                     }
+                 }
+             }
+         }
+      }
+  }
+
+  if (iconv_close (cd) < 0)
+    {
+      perror ("iconv_close");
+      exit (1);
+    }
+
+  if (ferror (stdin) || ferror (stdout))
+    {
+      fprintf (stderr, "I/O error\n");
+      exit (1);
+    }
+
+  exit (0);
+}
diff --git a/iconvdata/tst-table-to.c b/iconvdata/tst-table-to.c
new file mode 100644 (file)
index 0000000..b725f1f
--- /dev/null
@@ -0,0 +1,107 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* Create a table from Unicode to CHARSET.
+   This is a good test for CHARSET's iconv() module, in particular the
+   TO_LOOP BODY macro.  */
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <iconv.h>
+#include <errno.h>
+
+int
+main (int argc, char *argv[])
+{
+  const char *charset;
+  iconv_t cd;
+
+  if (argc != 2)
+    {
+      fprintf (stderr, "Usage: tst-table-to charset\n");
+      exit (1);
+    }
+  charset = argv[1];
+
+  cd = iconv_open (charset, "UCS-2");
+  if (cd == (iconv_t)(-1))
+    {
+      perror ("iconv_open");
+      exit (1);
+    }
+
+  {
+    unsigned int i;
+    unsigned char buf[10];
+
+    for (i = 0; i < 0x10000; i++)
+      {
+       unsigned short in = i;
+       const char *inbuf = (const char *) &in;
+       size_t inbytesleft = sizeof (unsigned short);
+       char *outbuf = (char *) buf;
+       size_t outbytesleft = sizeof (buf);
+       size_t result = iconv (cd,
+                              (char *) &inbuf, &inbytesleft,
+                              &outbuf, &outbytesleft);
+       if (result == (size_t)(-1))
+         {
+           if (errno != EILSEQ)
+             {
+               int saved_errno = errno;
+               fprintf (stderr, "0x%02X: iconv error: ", i);
+               errno = saved_errno;
+               perror ("");
+               exit (1);
+             }
+         }
+       else if (result == 0) /* ignore conversions with transliteration */
+         {
+           unsigned int j, jmax;
+           if (inbytesleft != 0 || outbytesleft == sizeof (buf))
+             {
+               fprintf (stderr, "0x%02X: inbytes = %ld, outbytes = %ld\n", i,
+                        (long) (sizeof (unsigned short) - inbytesleft),
+                        (long) (sizeof (buf) - outbytesleft));
+               exit (1);
+             }
+           jmax = sizeof (buf) - outbytesleft;
+           printf ("0x");
+           for (j = 0; j < jmax; j++)
+             printf ("%02X", buf[j]);
+           printf ("\t0x%04X\n", i);
+         }
+      }
+  }
+
+  if (iconv_close (cd) < 0)
+    {
+      perror ("iconv_close");
+      exit (1);
+    }
+
+  if (ferror (stdin) || ferror (stdout))
+    {
+      fprintf (stderr, "I/O error\n");
+      exit (1);
+    }
+
+  exit (0);
+}
diff --git a/iconvdata/tst-table.sh b/iconvdata/tst-table.sh
new file mode 100755 (executable)
index 0000000..4cd2f6e
--- /dev/null
@@ -0,0 +1,75 @@
+#!/bin/sh
+# Copyright (C) 2000 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+# Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with the GNU C Library; see the file COPYING.LIB.  If not,
+# write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# Checks that the iconv() implementation (in both directions) for a
+# stateless encoding agrees with the charmap table.
+
+common_objpfx=$1
+objpfx=$2
+charset=$3
+charmap=$4
+
+GCONV_PATH=${common_objpfx}iconvdata
+export GCONV_PATH
+LC_ALL=C
+export LC_ALL
+
+set -e
+
+# Get the charmap.
+./tst-table-charmap.sh ${charmap:-$charset} \
+  < ../localedata/charmaps/${charmap:-$charset} \
+  > ${objpfx}tst-${charset}.charmap.table
+
+# Precompute expected differences between the two iconv directions.
+if test ${charset} = EUC-TW; then
+  irreversible=${objpfx}tst-${charset}.irreversible
+  grep '^0x8EA1' ${objpfx}tst-${charset}.charmap.table > ${irreversible}
+else
+  irreversible=${charset}.irreversible
+fi
+
+# iconv in one direction.
+${common_objpfx}elf/ld.so --library-path $common_objpfx \
+${objpfx}tst-table-from ${charset} \
+  > ${objpfx}tst-${charset}.table
+
+# iconv in the other direction.
+${common_objpfx}elf/ld.so --library-path $common_objpfx \
+${objpfx}tst-table-to ${charset} | sort \
+  > ${objpfx}tst-${charset}.inverse.table
+
+# Difference between the two iconv directions.
+diff ${objpfx}tst-${charset}.table ${objpfx}tst-${charset}.inverse.table | \
+  grep '^[<>]' | sed -e 's,^. ,,' > ${objpfx}tst-${charset}.irreversible.table
+
+# Check 1: charmap and iconv forward should be identical.
+cmp -s ${objpfx}tst-${charset}.charmap.table ${objpfx}tst-${charset}.table
+
+# Check 2: the difference between the two iconv directions.
+if test -f ${irreversible}; then
+  cat ${objpfx}tst-${charset}.charmap.table ${irreversible} | sort | uniq -u \
+    > ${objpfx}tst-${charset}.tmp.table
+  cmp -s ${objpfx}tst-${charset}.tmp.table ${objpfx}tst-${charset}.inverse.table
+else
+  cmp -s ${objpfx}tst-${charset}.table ${objpfx}tst-${charset}.inverse.table
+fi
+
+exit 0
diff --git a/iconvdata/tst-tables.sh b/iconvdata/tst-tables.sh
new file mode 100755 (executable)
index 0000000..8d2735a
--- /dev/null
@@ -0,0 +1,213 @@
+#!/bin/sh
+# Copyright (C) 2000 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+# Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with the GNU C Library; see the file COPYING.LIB.  If not,
+# write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# Checks that the iconv() implementation (in both directions) for the
+# stateless encodings agrees with the corresponding charmap table.
+
+common_objpfx=$1
+objpfx=$2
+
+status=0
+
+cat <<EOF |
+  # Single-byte and other "small" encodings come here.
+  # Keep this list in the same order as gconv-modules.
+  #
+  # charset name    table name          comment
+  ASCII             ANSI_X3.4-1968
+  ISO646-GB         BS_4730
+  ISO646-CA         CSA_Z243.4-1985-1
+  ISO646-CA2        CSA_Z243.4-1985-2
+  ISO646-DE         DIN_66003
+  ISO646-DK         DS_2089
+  ISO646-ES         ES
+  ISO646-ES2        ES2
+  ISO646-CN         GB_1988-80
+  ISO646-IT         IT
+  ISO646-JP         JIS_C6220-1969-RO
+  ISO646-JP-OCR-B   JIS_C6229-1984-B
+  ISO646-YU         JUS_I.B1.002
+  ISO646-KR         KSC5636
+  ISO646-HU         MSZ_7795.3
+  ISO646-CU         NC_NC00-10
+  ISO646-FR         NF_Z_62-010
+  ISO646-FR1        NF_Z_62-010_1973
+  ISO646-NO         NS_4551-1
+  ISO646-NO2        NS_4551-2
+  ISO646-PT         PT
+  ISO646-PT2        PT2
+  ISO646-SE         SEN_850200_B
+  ISO646-SE2        SEN_850200_C
+  ISO-8859-1
+  ISO-8859-2
+  ISO-8859-3
+  ISO-8859-4
+  ISO-8859-5
+  ISO-8859-6
+  ISO-8859-7
+  ISO-8859-8
+  ISO-8859-9
+  ISO-8859-10
+  #ISO-8859-11                          No corresponding table, nonstandard
+  ISO-8859-13
+  ISO-8859-14
+  ISO-8859-15
+  ISO-8859-16
+  T.61-8BIT
+  ISO_6937
+  #ISO_6937-2        ISO-IR-90          Handling of combining marks is broken
+  KOI-8
+  KOI8-R
+  LATIN-GREEK
+  LATIN-GREEK-1
+  HP-ROMAN8
+  EBCDIC-AT-DE
+  EBCDIC-AT-DE-A
+  EBCDIC-CA-FR
+  EBCDIC-DK-NO
+  EBCDIC-DK-NO-A
+  EBCDIC-ES
+  EBCDIC-ES-A
+  EBCDIC-ES-S
+  EBCDIC-FI-SE
+  EBCDIC-FI-SE-A
+  EBCDIC-FR
+  EBCDIC-IS-FRISS
+  EBCDIC-IT
+  EBCDIC-PT
+  EBCDIC-UK
+  EBCDIC-US
+  IBM037
+  IBM038
+  IBM256
+  IBM273
+  IBM274
+  IBM275
+  IBM277
+  IBM278
+  IBM280
+  IBM281
+  IBM284
+  IBM285
+  IBM290
+  IBM297
+  IBM420
+  IBM423
+  IBM424
+  IBM437
+  IBM500
+  IBM850
+  IBM851
+  IBM852
+  IBM855
+  IBM857
+  IBM860
+  IBM861
+  IBM862
+  IBM863
+  IBM864
+  IBM865
+  IBM866
+  IBM868
+  IBM869
+  IBM870
+  IBM871
+  IBM875
+  IBM880
+  IBM891
+  IBM903
+  IBM904
+  IBM905
+  IBM918
+  IBM1004
+  IBM1026
+  IBM1047
+  CP1250
+  CP1251
+  CP1252
+  CP1253
+  CP1254
+  CP1255
+  CP1256
+  CP1257
+  CP1258
+  IBM874
+  CP737
+  CP775
+  MACINTOSH
+  IEC_P27-1
+  ASMO_449
+  ISO-IR-99         ANSI_X3.110-1983
+  ISO-IR-139        CSN_369103
+  CWI
+  DEC-MCS
+  ECMA-CYRILLIC
+  ISO-IR-153        GOST_19768-74
+  GREEK-CCITT
+  GREEK7
+  GREEK7-OLD
+  INIS
+  INIS-8
+  INIS-CYRILLIC
+  ISO_2033          ISO_2033-1983
+  ISO_5427
+  ISO_5427-EXT
+  #ISO_5428                             Handling of combining marks is broken
+  ISO_10367-BOX
+  MAC-IS
+  MAC-UK
+  NATS-DANO
+  NATS-SEFI
+  WIN-SAMI-2        SAMI-WS2
+  ISO-IR-197
+  TIS-620
+  KOI8-U
+  ISIRI-3342
+  #
+  # Multibyte encodings come here
+  #
+  SJIS
+  #EUC-KR                               Charmap contains extraneous entries
+  CP949
+  #JOHAB                                No charmap exists
+  BIG5
+  #BIG5HKSCS                            Broken, please fix it
+  EUC-JP
+  EUC-CN            GB2312
+  #GBK                                  Converter uses private area characters
+  EUC-TW
+  #GB18030                              Broken, please fix it
+  #
+  # Stateful encodings not testable this way
+  #
+  #ISO-2022-JP
+  #ISO-2022-JP-2
+  #ISO-2022-KR
+  #ISO-2022-CN
+  #
+EOF
+while read charset charmap; do
+  case ${charset} in \#*) continue;; esac
+  echo "Testing ${charset}" 1>&2
+  ./tst-table.sh ${common_objpfx} ${objpfx} ${charset} ${charmap} \
+  || { echo "failed: ./tst-table.sh ${common_objpfx} ${objpfx} ${charset} ${charmap}"; status=1; }
+done
+
+exit $status
index 90ab019..ac3776a 100644 (file)
@@ -147,7 +147,7 @@ int __pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize)
   size_t ps = __getpagesize ();
 
   /* First round up the guard size.  */
-  guardsize = roundup (guardsize, ps);
+  guardsize = page_roundup (guardsize, ps);
 
   /* The guard size must not be larger than the stack itself */
   if (guardsize >= attr->__stacksize) return EINVAL;
index 23bba4d..f4a38c9 100644 (file)
@@ -1,3 +1,20 @@
+2000-09-03  Bruno Haible  <haible@clisp.cons.org>
+
+       * charmaps/EUC-TW: Add commented non-reversible mappings.
+
+2000-09-03  Bruno Haible  <haible@clisp.cons.org>
+
+       * charmaps/CP949: New file.
+
+2000-09-03  Bruno Haible  <haible@clisp.cons.org>
+
+       * charmaps/GB2312: Remove 0x80..0xA0, 0xAA..0xAF, 0xF8..FF.
+
+2000-09-03  Bruno Haible  <haible@clisp.cons.org>
+
+       * charmaps/EUC-JP: Nonreversibly map 0xA1C0 to U+005C and 0x8FA2B7 to
+       U+007E.
+
 2000-09-01  Ulrich Drepper  <drepper@redhat.com>
 
        * locales/zh_HK: Use zh_TW data for LC_MESSAGES.
index 5f04a9d..fcf62b8 100644 (file)
@@ -3,6 +3,6 @@ CPPFLAGS-start.S = -D__ASSEMBLY__
 endif
 
 ifeq ($(subdir),elf)
-dl-routines += dl-symaddr dl-fptr
-rtld-routines += dl-symaddr dl-fptr
+sysdep-dl-routines += dl-symaddr dl-fptr
+sysdep_routines += $(sysdep-dl-routines)
 endif