/* Mapping tables for SJIS handling.
- Copyright (C) 1997, 1998 Free Software Foundation, Inc.
+ Copyright (C) 1997-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
+ Lesser General Public License for more details.
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+#include <dlfcn.h>
#include <stdint.h>
#include <wchar.h>
-static const uint32_t halfkana_to_ucs4[] =
-{
- 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 0xff66, 0xff67, 0xff68,
- 0xff69, 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, 0xff70,
- 0xff71, 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78,
- 0xff79, 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f, 0xff80,
- 0xff81, 0xff82, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88,
- 0xff89, 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, 0xff8f, 0xff90,
- 0xff91, 0xff92, 0xff93, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98,
- 0xff99, 0xff9a, 0xff9b, 0xff9c, 0xff9d, 0xff9e, 0xff9f
-};
-
/* The following table can be generated from the file
unix/mappings/eastasia/jis/shiftjis.txt
The only problem is that the shiftjis.txt file does not contain the
mapping for the characters 0x00 to 0x1f. We add them ourself.
+
+ A much bigger problem is that mapping of the tilde and the backslash
+ character. There are no such characters in SJIS. The proposed
+ handling is to simply map the two input values to the corresponding
+ byte values of ASCII. Things fall more or less in place this way.
*/
static const char from_ucs4_lat1[0xf8][2] =
{
[0x0075] = "\x75\x00", [0x0076] = "\x76\x00", [0x0077] = "\x77\x00",
[0x0078] = "\x78\x00", [0x0079] = "\x79\x00", [0x007a] = "\x7a\x00",
[0x007b] = "\x7b\x00", [0x007c] = "\x7c\x00", [0x007d] = "\x7d\x00",
- [0x007e] = "\x7e\x00",
+ [0x007e] = "\x7e\x00", [0x007f] = "\x7f\x00",
[0x00a2] = "\x81\x91", [0x00a3] = "\x81\x92", [0x00a5] = "\x5c\x00",
[0x00a7] = "\x81\x98", [0x00a8] = "\x81\x4e", [0x00ac] = "\x81\xca",
[0x00b0] = "\x81\x8b", [0x00b1] = "\x81\x7d", [0x00b4] = "\x81\x4c",
[0x0055] = "\x82\x95", [0x0056] = "\x82\x96", [0x0057] = "\x82\x97",
[0x0058] = "\x82\x98", [0x0059] = "\x82\x99", [0x005a] = "\x82\x9a",
[0x005b] = "\x81\x6f", [0x005c] = "\x81\x62", [0x005d] = "\x81\x70",
- [0x005e] = "\x00\x00", [0x005f] = "\x00\x00",
+ [0x005e] = "\x00\x00", [0x005f] = "\x00\x00",
[0x0060] = "\x00\x00", [0x0061] = "\xa1\x00", [0x0062] = "\xa2\x00",
[0x0063] = "\xa3\x00", [0x0064] = "\xa4\x00", [0x0065] = "\xa5\x00",
[0x0066] = "\xa6\x00", [0x0067] = "\xa7\x00", [0x0068] = "\xa8\x00",
#define MIN_NEEDED_FROM 1
#define MAX_NEEDED_FROM 2
#define MIN_NEEDED_TO 4
+#define ONE_DIRECTION 0
/* First define the conversion function from SJIS to UCS4. */
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
{ \
uint32_t ch = *inptr; \
\
- if (ch == 0x5c) \
+ if (__builtin_expect (ch, 0) == 0x5c) \
{ \
ch = 0xa5; \
++inptr; \
} \
- else if (ch == 0x7e) \
+ else if (__builtin_expect (ch, 0) == 0x7e) \
{ \
ch = 0x203e; \
++inptr; \
} \
- else if (ch < 0x7e) \
+ else if (ch < 0x80) \
++inptr; \
else if (ch >= 0xa1 && ch <= 0xdf) \
{ \
- ch = halfkana_to_ucs4[ch - 0xa1]; \
+ ch += 0xfec0; \
++inptr; \
} \
- else if (ch > 0xea || ch == 0xa0 || ch == 0x7f || ch == 0x80) \
+ else if (__builtin_expect (ch > 0xea, 0) \
+ || __builtin_expect (ch, 0) == 0xa0 \
+ || __builtin_expect (ch <= 0x80, 0)) \
{ \
/* These are illegal. */ \
- result = GCONV_ILLEGAL_INPUT; \
- break; \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
else \
{ \
- /* Two-byte character. First test whether the next character \
+ /* Two-byte character. First test whether the next byte \
is also available. */ \
uint32_t ch2; \
uint_fast32_t idx; \
\
- if (NEED_LENGTH_TEST && inptr + 1 >= inend) \
+ if (__glibc_unlikely (inptr + 1 >= inend)) \
{ \
- /* The second character is not available. Store \
+ /* The second byte is not available. Store \
the intermediate result. */ \
- result = GCONV_INCOMPLETE_INPUT; \
+ result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
\
ch2 = inptr[1]; \
idx = ch * 256 + ch2; \
- if (idx < 0x8140 || (idx > 0x84be && idx < 0x889f) \
- || (idx > 0x88fc && idx < 0x8940) \
- || (idx > 0x9ffc && idx < 0xe040) || idx > 0xeaa4) \
+ if (__glibc_unlikely (ch2 < 0x40)) \
{ \
/* This is illegal. */ \
- result = GCONV_ILLEGAL_INPUT; \
- break; \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
+ } \
+ else if ((__builtin_expect (idx > 0x84be && idx < 0x889f, 0)) \
+ || (__builtin_expect (idx > 0x88fc && idx < 0x8940, 0)) \
+ || (__builtin_expect (idx > 0x9ffc && idx < 0xe040, 0)) \
+ || __builtin_expect (idx > 0xeaa4, 0)) \
+ { \
+ /* This is illegal. */ \
+ STANDARD_FROM_LOOP_ERR_HANDLER (2); \
} \
else \
{ \
else \
ch = cjk_block4[(ch - 0xe0) * 192 + ch2 - 0x40]; \
\
- inptr += 2; \
- } \
+ if (__glibc_unlikely (ch == 0)) \
+ { \
+ /* This is an illegal character. */ \
+ STANDARD_FROM_LOOP_ERR_HANDLER (2); \
+ } \
\
- if (ch == 0) \
- { \
- /* This is an illegal character. */ \
- result = GCONV_ILLEGAL_INPUT; \
- break; \
+ inptr += 2; \
} \
} \
\
- *((uint32_t *) outptr)++ = ch; \
+ put32 (outptr, ch); \
+ outptr += 4; \
+ }
+#define LOOP_NEED_FLAGS
+#define ONEBYTE_BODY \
+ { \
+ if (c < 0x80) \
+ { \
+ if (c == 0x5c) \
+ return 0xa5; \
+ if (c == 0x7e) \
+ return 0x203e; \
+ return c; \
+ } \
+ if (c >= 0xa1 && c <= 0xdf) \
+ return 0xfec0 + c; \
+ return WEOF; \
}
#include <iconv/loop.c>
#define LOOPFCT TO_LOOP
#define BODY \
{ \
- uint32_t ch = *((uint32_t *) inptr); \
+ uint32_t ch = get32 (inptr); \
const char *cp; \
\
if (ch >= (sizeof (from_ucs4_lat1) / sizeof (from_ucs4_lat1[0]))) \
cp = from_ucs4_greek[ch - 0x391]; \
else if (ch >= 0x2010 && ch <= 0x9fa0) \
cp = from_ucs4_cjk[ch - 0x02010]; \
- else if (ch >= 0xff01 && ch <= 0xffef) \
+ else if (__builtin_expect (ch >= 0xff01, 1) \
+ && __builtin_expect (ch <= 0xffef, 1)) \
cp = from_ucs4_extra[ch - 0xff00]; \
else \
{ \
+ UNICODE_TAG_HANDLER (ch, 4); \
/* Illegal character. */ \
- result = GCONV_ILLEGAL_INPUT; \
- break; \
+ cp = ""; \
} \
} \
else \
cp = from_ucs4_lat1[ch]; \
\
- if (cp[0] == '\0' && ch != 0) \
+ if (__builtin_expect (cp[0] == '\0', 0) && ch != 0) \
{ \
/* Illegal character. */ \
- result = GCONV_ILLEGAL_INPUT; \
- break; \
+ STANDARD_TO_LOOP_ERR_HANDLER (4); \
} \
- \
- *outptr++ = cp[0]; \
- /* Now test for a possible second byte and write this if possible. */ \
- if (cp[1] != '\0') \
+ else \
{ \
- if (NEED_LENGTH_TEST && outptr >= outend) \
+ *outptr = cp[0]; \
+ /* Now test for a possible second byte and write this if possible. */\
+ if (cp[1] != '\0') \
{ \
- /* The result does not fit into the buffer. */ \
- result = GCONV_FULL_OUTPUT; \
- break; \
+ if (__glibc_unlikely (outptr + 1 >= outend)) \
+ { \
+ /* The result does not fit into the buffer. */ \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ *++outptr = cp[1]; \
} \
- *outptr++ = cp[1]; \
+ ++outptr; \
} \
\
inptr += 4; \
}
+#define LOOP_NEED_FLAGS
#include <iconv/loop.c>