/* Mapping tables for SJIS handling.
- Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
+ Copyright (C) 1997-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
+ Lesser General Public License for more details.
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <dlfcn.h>
#include <stdint.h>
#include <wchar.h>
-static const uint32_t halfkana_to_ucs4[] =
-{
- 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 0xff66, 0xff67, 0xff68,
- 0xff69, 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, 0xff70,
- 0xff71, 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78,
- 0xff79, 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f, 0xff80,
- 0xff81, 0xff82, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88,
- 0xff89, 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, 0xff8f, 0xff90,
- 0xff91, 0xff92, 0xff93, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98,
- 0xff99, 0xff9a, 0xff9b, 0xff9c, 0xff9d, 0xff9e, 0xff9f
-};
-
/* The following table can be generated from the file
unix/mappings/eastasia/jis/shiftjis.txt
The only problem is that the shiftjis.txt file does not contain the
mapping for the characters 0x00 to 0x1f. We add them ourself.
+
+ A much bigger problem is that mapping of the tilde and the backslash
+ character. There are no such characters in SJIS. The proposed
+ handling is to simply map the two input values to the corresponding
+ byte values of ASCII. Things fall more or less in place this way.
*/
static const char from_ucs4_lat1[0xf8][2] =
{
[0x0051] = "\x51\x00", [0x0052] = "\x52\x00", [0x0053] = "\x53\x00",
[0x0054] = "\x54\x00", [0x0055] = "\x55\x00", [0x0056] = "\x56\x00",
[0x0057] = "\x57\x00", [0x0058] = "\x58\x00", [0x0059] = "\x59\x00",
- [0x005a] = "\x5a\x00", [0x005b] = "\x5b\x00", [0x005c] = "\x81\x5c",
+ [0x005a] = "\x5a\x00", [0x005b] = "\x5b\x00", [0x005c] = "\x5c\x00",
[0x005d] = "\x5d\x00", [0x005e] = "\x5e\x00", [0x005f] = "\x5f\x00",
[0x0060] = "\x60\x00", [0x0061] = "\x61\x00", [0x0062] = "\x62\x00",
[0x0063] = "\x63\x00", [0x0064] = "\x64\x00", [0x0065] = "\x65\x00",
[0x0075] = "\x75\x00", [0x0076] = "\x76\x00", [0x0077] = "\x77\x00",
[0x0078] = "\x78\x00", [0x0079] = "\x79\x00", [0x007a] = "\x7a\x00",
[0x007b] = "\x7b\x00", [0x007c] = "\x7c\x00", [0x007d] = "\x7d\x00",
- [0x007e] = "\x00\x00",
+ [0x007e] = "\x7e\x00", [0x007f] = "\x7f\x00",
[0x00a2] = "\x81\x91", [0x00a3] = "\x81\x92", [0x00a5] = "\x5c\x00",
[0x00a7] = "\x81\x98", [0x00a8] = "\x81\x4e", [0x00ac] = "\x81\xca",
[0x00b0] = "\x81\x8b", [0x00b1] = "\x81\x7d", [0x00b4] = "\x81\x4c",
#define MIN_NEEDED_FROM 1
#define MAX_NEEDED_FROM 2
#define MIN_NEEDED_TO 4
+#define ONE_DIRECTION 0
/* First define the conversion function from SJIS to UCS4. */
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
ch = 0x203e; \
++inptr; \
} \
- else if (ch < 0x7e) \
+ else if (ch < 0x80) \
++inptr; \
else if (ch >= 0xa1 && ch <= 0xdf) \
{ \
- ch = halfkana_to_ucs4[ch - 0xa1]; \
+ ch += 0xfec0; \
++inptr; \
} \
- else if (__builtin_expect (ch, 0) > 0xea \
+ else if (__builtin_expect (ch > 0xea, 0) \
|| __builtin_expect (ch, 0) == 0xa0 \
- || __builtin_expect (ch, 0x81) <= 0x80) \
+ || __builtin_expect (ch <= 0x80, 0)) \
{ \
/* These are illegal. */ \
- if (! ignore_errors_p ()) \
- { \
- /* This is an illegal character. */ \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
- } \
- \
- ++inptr; \
- ++*irreversible; \
- continue; \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
else \
{ \
- /* Two-byte character. First test whether the next character \
+ /* Two-byte character. First test whether the next byte \
is also available. */ \
uint32_t ch2; \
uint_fast32_t idx; \
\
- if (__builtin_expect (inptr + 1 >= inend, 0)) \
+ if (__glibc_unlikely (inptr + 1 >= inend)) \
{ \
- /* The second character is not available. Store \
+ /* The second byte is not available. Store \
the intermediate result. */ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
\
ch2 = inptr[1]; \
idx = ch * 256 + ch2; \
- if (__builtin_expect (ch < 0x81, 0) \
- || __builtin_expect (ch2 < 0x40, 0) \
- || (__builtin_expect (idx, 0x8140) > 0x84be && idx < 0x889f) \
- || (__builtin_expect (idx, 0x8140) > 0x88fc && idx < 0x8940) \
- || (__builtin_expect (idx, 0x8140) > 0x9ffc && idx < 0xe040) \
- || __builtin_expect (idx, 0x8140) > 0xeaa4) \
+ if (__glibc_unlikely (ch2 < 0x40)) \
{ \
/* This is illegal. */ \
- if (! ignore_errors_p ()) \
- { \
- /* This is an illegal character. */ \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
- } \
- \
- ++inptr; \
- ++*irreversible; \
- continue; \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
+ } \
+ else if ((__builtin_expect (idx > 0x84be && idx < 0x889f, 0)) \
+ || (__builtin_expect (idx > 0x88fc && idx < 0x8940, 0)) \
+ || (__builtin_expect (idx > 0x9ffc && idx < 0xe040, 0)) \
+ || __builtin_expect (idx > 0xeaa4, 0)) \
+ { \
+ /* This is illegal. */ \
+ STANDARD_FROM_LOOP_ERR_HANDLER (2); \
} \
else \
{ \
else \
ch = cjk_block4[(ch - 0xe0) * 192 + ch2 - 0x40]; \
\
- inptr += 2; \
- } \
- \
- if (__builtin_expect (ch, 1) == 0) \
- { \
- /* This is an illegal character. */ \
- if (! ignore_errors_p ()) \
+ if (__glibc_unlikely (ch == 0)) \
{ \
- /* This is an illegal character. */ \
- result = __GCONV_ILLEGAL_INPUT; \
- break; \
+ /* This is an illegal character. */ \
+ STANDARD_FROM_LOOP_ERR_HANDLER (2); \
} \
\
inptr += 2; \
- ++*irreversible; \
- continue; \
} \
} \
\
outptr += 4; \
}
#define LOOP_NEED_FLAGS
+#define ONEBYTE_BODY \
+ { \
+ if (c < 0x80) \
+ { \
+ if (c == 0x5c) \
+ return 0xa5; \
+ if (c == 0x7e) \
+ return 0x203e; \
+ return c; \
+ } \
+ if (c >= 0xa1 && c <= 0xdf) \
+ return 0xfec0 + c; \
+ return WEOF; \
+ }
#include <iconv/loop.c>
cp = from_ucs4_greek[ch - 0x391]; \
else if (ch >= 0x2010 && ch <= 0x9fa0) \
cp = from_ucs4_cjk[ch - 0x02010]; \
- else if (__builtin_expect (ch, 0xff01) >= 0xff01 \
- && __builtin_expect (ch, 0xff01) <= 0xffef) \
+ else if (__builtin_expect (ch >= 0xff01, 1) \
+ && __builtin_expect (ch <= 0xffef, 1)) \
cp = from_ucs4_extra[ch - 0xff00]; \
else \
- /* Illegal character. */ \
- cp = ""; \
+ { \
+ UNICODE_TAG_HANDLER (ch, 4); \
+ /* Illegal character. */ \
+ cp = ""; \
+ } \
} \
else \
cp = from_ucs4_lat1[ch]; \
\
- if (__builtin_expect (cp[0], '\1') == '\0' && ch != 0) \
+ if (__builtin_expect (cp[0] == '\0', 0) && ch != 0) \
{ \
/* Illegal character. */ \
- STANDARD_ERR_HANDLER (4); \
+ STANDARD_TO_LOOP_ERR_HANDLER (4); \
} \
else \
{ \
- *outptr++ = cp[0]; \
+ *outptr = cp[0]; \
/* Now test for a possible second byte and write this if possible. */\
if (cp[1] != '\0') \
{ \
- if (__builtin_expect (outptr >= outend, 0)) \
+ if (__glibc_unlikely (outptr + 1 >= outend)) \
{ \
/* The result does not fit into the buffer. */ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
- *outptr++ = cp[1]; \
+ *++outptr = cp[1]; \
} \
+ ++outptr; \
} \
\
inptr += 4; \