Update.
authorUlrich Drepper <drepper@redhat.com>
Sun, 26 Apr 1998 14:03:20 +0000 (14:03 +0000)
committerUlrich Drepper <drepper@redhat.com>
Sun, 26 Apr 1998 14:03:20 +0000 (14:03 +0000)
* iconvdata/Makefile: Add rules for ISO-2022-KR.
* iconvdata/iso-2022-kr.c: New file.

* iconvdata/gconv-modules: Add entry for ISO-2022-JP, ISO-2022-JP-2,
and ISO-2022-KR.

ChangeLog
iconvdata/Makefile
iconvdata/gconv-modules
iconvdata/iso-2022-jp.c
iconvdata/iso-2022-kr.c [new file with mode: 0644]

index 9f323b741ecfc541f4c69d486ec12ce581b25425..8d1fead57b3c37f948d7bcc0fb13eb55fd56f88c 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,10 @@
 1998-04-26  Ulrich Drepper  <drepper@cygnus.com>
 
-       * iconvdata/gconv-modules: Add entry for ISO-2022-JP and
-       ISO-2022-JP-2.
+       * iconvdata/Makefile: Add rules for ISO-2022-KR.
+       * iconvdata/iso-2022-kr.c: New file.
+
+       * iconvdata/gconv-modules: Add entry for ISO-2022-JP, ISO-2022-JP-2,
+       and ISO-2022-KR.
 
 1998-04-25 18:39  Ulrich Drepper  <drepper@cygnus.com>
 
index d302250a8b6b708ded6c348ff9f36f59834935a4..276ae886ed9c5d06804b3c1921ec7a0a9439a851 100644 (file)
@@ -31,7 +31,7 @@ modules       := ISO8859-1 ISO8859-2 ISO8859-3 ISO8859-4 ISO8859-5             \
           EBCDIC-FI-SE-A EBCDIC-FR EBCDIC-IS-FRISS EBCDIC-IT EBCDIC-PT  \
           EBCDIC-UK EBCDIC-US IBM037 IBM038 IBM274 IBM275 IBM423 IBM424 \
           IBM500 IBM870 IBM871 IBM891 IBM903 IBM904 IBM905 IBM1047      \
-          CP874 CP737 CP775
+          CP874 CP737 CP775 ISO-2022-KR
 ifneq ($(PERL),no)
 modules += KOI8-R LATIN-GREEK LATIN-GREEK-1 IBM256 IBM273 IBM277 IBM278         \
           IBM280 IBM281 IBM284 IBM285 IBM290 IBM297 IBM420 IBM437       \
@@ -143,6 +143,7 @@ EUC-JP-routines := eucjp
 EUC-CN-routines := euccn
 EUC-TW-routines := euctw
 ISO-2022-JP-routines := iso-2022-jp
+ISO-2022-KR-routines := iso-2022-kr
 libJIS-routines := jis0201 jis0208 jis0212
 libKSC-routines := ksc5601
 libGB-routines := gb2312
@@ -163,7 +164,9 @@ $(objpfx)EUC-TW.so: $(objpfx)libCNS.so
 
 LDFLAGS-ISO-2022-JP.so = -Wl,-rpath,$(gconvdir)
 $(objpfx)ISO-2022-JP.so: $(objpfx)libJIS.so $(objpfx)libGB.so \
-                        $(objpfx)libCNS.so
+                        $(objpfx)libCNS.so $(objpfx)libKSC.so
+LDFLAGS-ISO-2022-JP.so = -Wl,-rpath,$(gconvdir)
+$(objpfx)ISO-2022-JP.so: $(objpfx)libKSC.so
 
 LDFLAGS-libJIS.so = -Wl,-soname,$(@F)
 LDFLAGS-libKSC.so = -Wl,-soname,$(@F)
@@ -193,7 +196,7 @@ distribute := 8bit-generic.c 8bit-gap.c gap.pl gaptab.pl gconv-modules          \
              ibm903.c ibm904.c ibm905.c ibm918.c ibm1004.c ibm1026.c       \
              ibm1047.c cp1250.c cp1251.c cp1252.c cp1253.c cp1254.c        \
              cp1255.c cp1256.c cp1257.c cp874.c cp874.h cp737.c cp737.h    \
-             cp775.c cp775.h iso-2022-jp.c
+             cp775.c cp775.h iso-2022-jp.c iso-2022-kr.c
 
 # We build the transformation modules only when we build shared libs.
 ifeq (yes,$(build-shared))
index 169fbb2b5227eb7e6c1d34312414db0fd22f3853..fe47da1e9c1d185e69c07a2c6b5e56a661389d80 100644 (file)
@@ -757,3 +757,7 @@ module      INTERNAL                ISO-2022-JP//           ISO-2022-JP     1
 
 module ISO-2022-JP-2//         INTERNAL                ISO-2022-JP     1
 module INTERNAL                ISO-2022-JP-2//         ISO-2022-JP     1
+
+#      from                      to                    module          cost
+module ISO-2022-KR//           INTERNAL                ISO-2022-KR     1
+module INTERNAL                ISO-2022-KR//           ISO-2022-KR     1
index afd6579d1601ef7cacf09167852cd70b584abe4d..9ca648c3c29713a003b27535e507b8c6a1a82d67 100644 (file)
@@ -54,6 +54,7 @@ struct gap
 #define PREPARE_LOOP \
   enum direction dir = ((struct iso2022jp_data *) step->data)->dir;          \
   enum variant var = ((struct iso2022jp_data *) step->data)->var;            \
+  int save_state;                                                            \
   int set = data->statep->count;
 #define END_LOOP \
   data->statep->count = set;
@@ -81,7 +82,6 @@ struct iso2022jp_data
 {
   enum direction dir;
   enum variant var;
-  mbstate_t save_state;
 };
 
 
@@ -211,11 +211,9 @@ gconv_end (struct gconv_step *data)
    and retore the state.  */
 #define SAVE_RESET_STATE(Save) \
   if (Save)                                                                  \
-    ((struct iso2022jp_data *) step->data)->save_state.count                 \
-      = data->statep->count;                                                 \
+    save_set = set;                                                          \
   else                                                                       \
-    data->statep->count                                                              \
-      = ((struct iso2022jp_data *) step->data)->save_state.count
+    set = save_set
 
 
 /* First define the conversion function from ISO-2022-JP to UCS4.  */
diff --git a/iconvdata/iso-2022-kr.c b/iconvdata/iso-2022-kr.c
new file mode 100644 (file)
index 0000000..16e656a
--- /dev/null
@@ -0,0 +1,273 @@
+/* Conversion module for ISO-2022-KR.
+   Copyright (C) 1998 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <gconv.h>
+#include <stdint.h>
+#include <string.h>
+#include "ksc5601.h"
+
+/* This makes obvious what everybody knows: 0x1b is the Esc character.  */
+#define SI     0x0f
+#define SO     0x0e
+
+/* Definitions used in the body of the `gconv' function.  */
+#define DEFINE_INIT            1
+#define DEFINE_FINI            1
+#define FROM_LOOP              from_iso2022kr_loop
+#define TO_LOOP                        to_iso2022kr_loop
+#define MIN_NEEDED_FROM                1
+#define MAX_NEEDED_FROM                3
+#define MIN_NEEDED_TO          4
+#define MAX_NEEDED_TO          4
+#define PREPARE_LOOP \
+  int save_set;
+  int set = data->statep->count;
+
+/* The COUNT element of the state keeps track of the currently selected
+   character set.  The possible values are:  */
+enum
+{
+  ASCII_set = 0,
+  KSC5601_set
+};
+
+
+/* Since this is a stateful encoding we have to provide code which resets
+   the output state to the initial state.  This has to be done during the
+   flushing.  */
+#define EMIT_SHIFT_TO_INIT \
+  if (data->statep->count != 0)                                                      \
+    {                                                                        \
+      if (step->data == &from_object)                                        \
+       /* It's easy, we don't have to emit anything, we just reset the       \
+          state for the input.  */                                           \
+       set = 0;                                                              \
+      else                                                                   \
+       {                                                                     \
+         char *outbuf = data->outbuf;                                        \
+                                                                             \
+         /* We are not in the initial state.  To switch back we have         \
+            to emit `SO'.  */                                                \
+         if (outbuf == data->outbufend)                                      \
+           /* We don't have enough room in the output buffer.  */            \
+           status = GCONV_FULL_OUTPUT;                                       \
+         else                                                                \
+           {                                                                 \
+             /* Write out the shift sequence.  */                            \
+             *outbuf++ = SO;                                                 \
+             data->outbuf = outbuf;                                          \
+             set = 0;                                                        \
+           }                                                                 \
+       }                                                                     \
+    }
+
+
+/* Since we might have to reset input pointer we must be able to save
+   and retore the state.  */
+#define SAVE_RESET_STATE(Save) \
+  if (Save)                                                                  \
+    save_set = set;                                                          \
+  else                                                                       \
+    set = save_set
+
+
+/* First define the conversion function from ISO-2022-JP to UCS4.  */
+#define MIN_NEEDED_INPUT       MIN_NEEDED_FROM
+#define MAX_NEEDED_INPUT       MAX_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT      MIN_NEEDED_TO
+#define LOOPFCT                        FROM_LOOP
+#define BODY \
+  {                                                                          \
+    uint32_t ch = *inptr;                                                    \
+                                                                             \
+    /* This is a 7bit character set, disallow all 8bit characters.  */       \
+    if (ch > 0x7f)                                                           \
+      {                                                                              \
+       result = GCONV_ILLEGAL_INPUT;                                         \
+       break;                                                                \
+      }                                                                              \
+                                                                             \
+    /* Recognize escape sequences.  */                                       \
+    if (ch == ESC)                                                           \
+      {                                                                              \
+       /* We don't really have to handle escape sequences since all the      \
+          switching is done using the SI and SO bytes.  Butwe have to        \
+          recognize `Esc $ ) C' since this is a kind of flag for this        \
+          encoding.  We simply ignore it.  */                                \
+       if (inptr + 1 > inend                                                 \
+           || (inptr[1] == '$'                                               \
+               && (inptr + 2 > inend                                         \
+                   || (inptr[2] == ')' && inptr + 3 > inend))))              \
+                                                                             \
+         {                                                                   \
+           result = GCONV_EMPTY_INPUT;                                       \
+           break;                                                            \
+         }                                                                   \
+       if (inptr[1] == '$' && inptr[2] == ')' && inptr[3] == 'C')            \
+         {                                                                   \
+           /* Yeah, yeah, we know this is ISO 2022-KR.  */                   \
+           inptr += 4;                                                       \
+           continue;                                                         \
+         }                                                                   \
+      }                                                                              \
+    else if (ch == SI)                                                       \
+      {                                                                              \
+       /* Switch to use KSC.  */                                             \
+       ++inptr;                                                              \
+       set = KSC5601_set;                                                    \
+       continue;                                                             \
+      }                                                                              \
+    else if (ch == SO)                                                       \
+      {                                                                              \
+       /* Switch to use ASCII.  */                                           \
+       ++inptr;                                                              \
+       set = ASCII_set;                                                      \
+       continue;                                                             \
+      }                                                                              \
+                                                                             \
+    if (set == ASCII_set || ch < 0x21 || ch == 0x7f)                         \
+      /* Almost done, just advance the input pointer.  */                    \
+      ++inptr;                                                               \
+    else                                                                     \
+      {                                                                              \
+       assert (set == KSC5601_set);                                          \
+                                                                             \
+       /* Use the KSC 5601 table.  */                                        \
+       ch = ksc5601_to_ucs4 (&inptr,                                         \
+                             NEED_LENGTH_TEST ? inend - inptr : 2, 0);       \
+                                                                             \
+       if (NEED_LENGTH_TEST && ch == 0)                                      \
+         {                                                                   \
+           result = GCONV_EMPTY_INPUT;                                       \
+           break;                                                            \
+         }                                                                   \
+       else if (ch == UNKNOWN_10646_CHAR)                                    \
+         {                                                                   \
+           result = GCONV_ILLEGAL_INPUT;                                     \
+           break;                                                            \
+         }                                                                   \
+      }                                                                              \
+                                                                             \
+    *((uint32_t *) outptr)++ = ch;                                           \
+  }
+#define EXTRA_LOOP_DECLS       , int set
+#include <iconv/loop.c>
+
+
+/* Next, define the other direction.  */
+#define MIN_NEEDED_INPUT       MIN_NEEDED_TO
+#define MIN_NEEDED_OUTPUT      MIN_NEEDED_FROM
+#define MAX_NEEDED_OUTPUT      MAX_NEEDED_FROM
+#define LOOPFCT                        TO_LOOP
+#define BODY \
+  {                                                                          \
+    unsigned char ch;                                                        \
+    size_t written = 0;                                                              \
+                                                                             \
+    ch = *((uint32_t *) inptr);                                                      \
+                                                                             \
+    /* First see whether we can write the character using the currently              \
+       selected character set.  */                                           \
+    if (set == ASCII_set || (ch >= 0x01 && (ch < 0x21 || ch == 0x7f)))       \
+      {                                                                              \
+       /* Please note that the NUL byte is *not* matched if we are not       \
+          currently using the ASCII charset.  This is because we must        \
+          switch to the initial state whenever a NUL byte is written.  */    \
+       if (ch <= 0x7f)                                                       \
+         {                                                                   \
+           *outptr++ = ch;                                                   \
+           written = 1;                                                      \
+         }                                                                   \
+      }                                                                              \
+    else                                                                     \
+      {                                                                              \
+       assert (set == KSC5601_set);                                          \
+                                                                             \
+       written = ucs4_to_ksc5601 (ch, outptr,                                \
+                                  (NEED_LENGTH_TEST ? outend - outptr : 2)); \
+                                                                             \
+       if (NEED_LENGTH_TEST && written == 0)                                 \
+         {                                                                   \
+           result = GCONV_FULL_OUTPUT;                                       \
+           break;                                                            \
+         }                                                                   \
+       if (written == UNKNOWN_10646_CHAR)                                    \
+         {                                                                   \
+           /* Either this is an unknown character or we have to switch       \
+              the currently selected character set.  The character sets      \
+              do not code entirely separate parts of ISO 10646 and           \
+              therefore there is no single correct result.  If we choose     \
+              the character set to use wrong we might be end up with         \
+              using yet another character set for the next character         \
+              though the current and the next could be encoded with one      \
+              character set.  We leave this kind of optimization for         \
+              later and now simply use a fixed order in which we test for    \
+              availability  */                                               \
+                                                                             \
+       if (ch <= 0x7f)                                                       \
+         {                                                                   \
+           /* We must encode using ASCII.  First write out the               \
+              escape sequence.  */                                           \
+           *outptr++ = SO;                                                   \
+           set = ASCII_set;                                                  \
+                                                                             \
+           if (NEED_LENGTH_TEST && outptr == outend)                         \
+             {                                                               \
+               result = GCONV_FULL_OUTPUT;                                   \
+               break;                                                        \
+             }                                                               \
+                                                                             \
+           *outptr++ = ch;                                                   \
+         }                                                                   \
+       else                                                                  \
+         {                                                                   \
+           written = ucs4_to_ksc5601 (ch, buf, 2);                           \
+           if (written != UNKNOWN_10646_CHAR)                                \
+             {                                                               \
+               /* We use KSC 5601.  */                                       \
+               *outptr++ = SI;                                               \
+               set = KSC5601_set;                                            \
+                                                                             \
+               if (NEED_LENGTH_TEST && outptr + 2 > outend)                  \
+                 {                                                           \
+                   result = GCONV_FULL_OUTPUT;                               \
+                   break;                                                    \
+                 }                                                           \
+                                                                             \
+               *outptr++ = buf[0];                                           \
+               *outptr++ = buf[1];                                           \
+             }                                                               \
+           else                                                              \
+             {                                                               \
+               result = GCONV_ILLEGAL_INPUT;                                 \
+               break;                                                        \
+             }                                                               \
+         }                                                                   \
+      }                                                                              \
+                                                                             \
+    /* Now that we wrote the output increment the input pointer.  */         \
+    inptr += 4;                                                                      \
+  }
+#define EXTRA_LOOP_DECLS       , int set
+#include <iconv/loop.c>
+
+
+/* Now define the toplevel functions.  */
+#include <iconv/skeleton.c>