1 /* Simple transformations functions.
2 Copyright (C) 1997, 1998 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
27 #include <sys/param.h>
30 # define EILSEQ EINVAL
34 /* These are definitions used by some of the functions for handling
35 UTF-8 encoding below. */
36 static const wchar_t encoding_mask[] =
38 ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
41 static const unsigned char encoding_byte[] =
43 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
49 __gconv_transform_dummy (struct gconv_step *step, struct gconv_step_data *data,
50 const char *inbuf, size_t *inlen, size_t *written,
55 /* We have no stateful encoding. So we don't have to do anything
61 do_write = MIN (*inlen, data->outbufsize - data->outbufavail);
63 memcpy (data->outbuf, inbuf, do_write);
66 data->outbufavail += do_write;
69 /* ### TODO Actually, this number must be devided according to the
70 size of the input charset. I.e., if the input is in UCS4 the
71 number of copied bytes must be divided by 4. */
79 /* Convert from ISO 646-IRV to ISO 10646/UCS4. */
81 __gconv_transform_ascii_ucs4 (struct gconv_step *step,
82 struct gconv_step_data *data, const char *inbuf,
83 size_t *inlen, size_t *written, int do_flush)
85 struct gconv_step *next_step = step + 1;
86 struct gconv_step_data *next_data = data + 1;
87 gconv_fct fct = next_step->fct;
91 /* If the function is called with no input this means we have to reset
92 to the initial state. The possibly partly converted input is
96 /* Clear the state. */
97 memset (data->statep, '\0', sizeof (mbstate_t));
100 /* Call the steps down the chain if there are any. */
105 struct gconv_step *next_step = step + 1;
106 struct gconv_step_data *next_data = data + 1;
108 result = (*fct) (next_step, next_data, NULL, 0, written, 1);
110 /* Clear output buffer. */
111 data->outbufavail = 0;
116 int save_errno = errno;
122 const unsigned char *newinbuf = inbuf;
126 while (data->outbufavail + sizeof (wchar_t) <= data->outbufsize
129 if (*newinbuf > '\x7f')
131 /* This is no correct ANSI_X3.4-1968 character. */
132 result = GCONV_ILLEGAL_INPUT;
136 /* It's an one byte sequence. */
137 *(wchar_t *) &data->outbuf[data->outbufavail]
138 = (wchar_t) *newinbuf;
139 data->outbufavail += sizeof (wchar_t);
146 /* Remember how much we converted. */
147 do_write += cnt * sizeof (wchar_t);
150 /* Check whether an illegal character appeared. */
151 if (result != GCONV_OK)
156 /* This is the last step. */
157 result = (*inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT);
162 result = GCONV_EMPTY_INPUT;
164 if (data->outbufavail > 0)
166 /* Call the functions below in the chain. */
167 size_t newavail = data->outbufavail;
169 result = (*fct) (next_step, next_data, data->outbuf, &newavail,
172 /* Correct the output buffer. */
173 if (newavail != data->outbufavail && newavail > 0)
175 memmove (data->outbuf,
176 &data->outbuf[data->outbufavail - newavail],
178 data->outbufavail = newavail;
182 while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
184 __set_errno (save_errno);
187 if (written != NULL && data->is_last)
188 *written = do_write / sizeof (wchar_t);
194 /* Convert from ISO 10646/UCS to ISO 646-IRV. */
196 __gconv_transform_ucs4_ascii (struct gconv_step *step,
197 struct gconv_step_data *data, const char *inbuf,
198 size_t *inlen, size_t *written, int do_flush)
200 struct gconv_step *next_step = step + 1;
201 struct gconv_step_data *next_data = data + 1;
202 gconv_fct fct = next_step->fct;
206 /* If the function is called with no input this means we have to reset
207 to the initial state. The possibly partly converted input is
211 /* Clear the state. */
212 memset (data->statep, '\0', sizeof (mbstate_t));
215 /* Call the steps down the chain if there are any. */
220 struct gconv_step *next_step = step + 1;
221 struct gconv_step_data *next_data = data + 1;
223 result = (*fct) (next_step, next_data, NULL, 0, written, 1);
225 /* Clear output buffer. */
226 data->outbufavail = 0;
231 int save_errno = errno;
237 const wchar_t *newinbuf = (const wchar_t *) inbuf;
241 while (data->outbufavail < data->outbufsize
242 && cnt + sizeof (wchar_t) + 3 < *inlen)
244 if (*newinbuf < L'\0' || *newinbuf > L'\x7f')
246 /* This is no correct ANSI_X3.4-1968 character. */
247 result = GCONV_ILLEGAL_INPUT;
251 /* It's an one byte sequence. */
252 data->outbuf[data->outbufavail++] = (char) *newinbuf;
256 cnt += sizeof (wchar_t);
259 /* Remember how much we converted. */
260 do_write += cnt / sizeof (wchar_t);
263 /* Check whether an illegal character appeared. */
264 if (result != GCONV_OK)
269 /* This is the last step. */
270 result = (*inlen < sizeof (wchar_t)
271 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT);
276 result = GCONV_EMPTY_INPUT;
278 if (data->outbufavail > 0)
280 /* Call the functions below in the chain. */
281 size_t newavail = data->outbufavail;
283 result = (*fct) (next_step, next_data, data->outbuf, &newavail,
286 /* Correct the output buffer. */
287 if (newavail != data->outbufavail && newavail > 0)
289 memmove (data->outbuf,
290 &data->outbuf[data->outbufavail - newavail],
292 data->outbufavail = newavail;
296 while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
298 __set_errno (save_errno);
301 if (written != NULL && data->is_last)
309 __gconv_transform_ucs4_utf8 (struct gconv_step *step,
310 struct gconv_step_data *data, const char *inbuf,
311 size_t *inlen, size_t *written, int do_flush)
313 struct gconv_step *next_step = step + 1;
314 struct gconv_step_data *next_data = data + 1;
315 gconv_fct fct = next_step->fct;
319 /* If the function is called with no input this means we have to reset
320 to the initial state. The possibly partly converted input is
324 /* Clear the state. */
325 memset (data->statep, '\0', sizeof (mbstate_t));
328 /* Call the steps down the chain if there are any. */
333 struct gconv_step *next_step = step + 1;
334 struct gconv_step_data *next_data = data + 1;
336 result = (*fct) (next_step, next_data, NULL, 0, written, 1);
338 /* Clear output buffer. */
339 data->outbufavail = 0;
344 int save_errno = errno;
350 const wchar_t *newinbuf = (const wchar_t *) inbuf;
353 while (data->outbufavail < data->outbufsize
354 && cnt * sizeof (wchar_t) + 3 < *inlen)
356 wchar_t wc = newinbuf[cnt];
358 if (wc < 0 && wc > 0x7fffffff)
360 /* This is no correct ISO 10646 character. */
361 result = GCONV_ILLEGAL_INPUT;
366 /* It's an one byte sequence. */
367 data->outbuf[data->outbufavail++] = (char) wc;
373 for (step = 2; step < 6; ++step)
374 if ((wc & encoding_mask[step - 2]) == 0)
377 if (data->outbufavail + step >= data->outbufsize)
381 start = data->outbufavail;
382 data->outbufavail += step;
383 data->outbuf[start] = encoding_byte[step - 2];
387 data->outbuf[start + step] = 0x80 | (wc & 0x3f);
391 data->outbuf[start] |= wc;
397 /* Remember how much we converted. */
399 *inlen -= cnt * sizeof (wchar_t);
401 /* Check whether an illegal character appeared. */
402 if (result != GCONV_OK)
407 /* This is the last step. */
408 result = (*inlen < sizeof (wchar_t)
409 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT);
414 result = GCONV_EMPTY_INPUT;
416 if (data->outbufavail > 0)
418 /* Call the functions below in the chain. */
419 size_t newavail = data->outbufavail;
421 result = (*fct) (next_step, next_data, data->outbuf, &newavail,
424 /* Correct the output buffer. */
425 if (newavail != data->outbufavail && newavail > 0)
427 memmove (data->outbuf,
428 &data->outbuf[data->outbufavail - newavail],
430 data->outbufavail = newavail;
434 while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
436 __set_errno (save_errno);
439 if (written != NULL && data->is_last)
447 __gconv_transform_utf8_ucs4 (struct gconv_step *step,
448 struct gconv_step_data *data, const char *inbuf,
449 size_t *inlen, size_t *written, int do_flush)
451 struct gconv_step *next_step = step + 1;
452 struct gconv_step_data *next_data = data + 1;
453 gconv_fct fct = next_step->fct;
457 /* If the function is called with no input this means we have to reset
458 to the initial state. The possibly partly converted input is
462 /* Clear the state. */
463 memset (data->statep, '\0', sizeof (mbstate_t));
466 /* Call the steps down the chain if there are any. */
471 struct gconv_step *next_step = step + 1;
472 struct gconv_step_data *next_data = data + 1;
474 result = (*fct) (next_step, next_data, NULL, 0, written, 1);
479 int save_errno = errno;
486 wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail];
490 while (data->outbufavail + sizeof (wchar_t) <= data->outbufsize
498 /* Next input byte. */
503 /* One byte sequence. */
507 else if ((byte & 0xe0) == 0xc0)
512 else if ((byte & 0xf0) == 0xe0)
514 /* We expect three bytes. */
518 else if ((byte & 0xf8) == 0xf0)
520 /* We expect four bytes. */
524 else if ((byte & 0xfc) == 0xf8)
526 /* We expect five bytes. */
530 else if ((byte & 0xfe) == 0xfc)
532 /* We expect six bytes. */
538 /* This is an illegal encoding. */
539 result = GCONV_ILLEGAL_INPUT;
543 if (cnt + count > *inlen)
545 /* We don't have enough input. */
551 /* Read the possible remaining bytes. */
557 if ((byte & 0xc0) != 0x80)
559 /* This is an illegal encoding. */
560 result = GCONV_ILLEGAL_INPUT;
565 value |= byte & 0x3f;
568 if (result != GCONV_OK)
578 /* Remember how much we converted. */
579 do_write += actually;
582 data->outbufavail += actually * sizeof (wchar_t);
584 /* Check whether an illegal character appeared. */
585 if (result != GCONV_OK)
587 result = GCONV_ILLEGAL_INPUT;
593 /* We have an incomplete character at the end. */
594 result = GCONV_INCOMPLETE_INPUT;
600 /* This is the last step. */
601 result = (data->outbufavail + sizeof (wchar_t) > data->outbufsize
602 ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT);
607 result = GCONV_EMPTY_INPUT;
609 if (data->outbufavail > 0)
611 /* Call the functions below in the chain. */
612 size_t newavail = data->outbufavail;
614 result = (*fct) (next_step, next_data, data->outbuf, &newavail,
617 /* Correct the output buffer. */
618 if (newavail != data->outbufavail && newavail > 0)
620 memmove (data->outbuf,
621 &data->outbuf[data->outbufavail - newavail],
623 data->outbufavail = newavail;
627 while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
629 __set_errno (save_errno);
632 if (written != NULL && data->is_last)
640 __gconv_transform_ucs2_ucs4 (struct gconv_step *step,
641 struct gconv_step_data *data, const char *inbuf,
642 size_t *inlen, size_t *written, int do_flush)
644 struct gconv_step *next_step = step + 1;
645 struct gconv_step_data *next_data = data + 1;
646 gconv_fct fct = next_step->fct;
650 /* If the function is called with no input this means we have to reset
651 to the initial state. The possibly partly converted input is
655 /* Clear the state. */
656 memset (data->statep, '\0', sizeof (mbstate_t));
659 /* Call the steps down the chain if there are any. */
664 struct gconv_step *next_step = step + 1;
665 struct gconv_step_data *next_data = data + 1;
667 result = (*fct) (next_step, next_data, NULL, 0, written, 1);
672 int save_errno = errno;
677 const uint16_t *newinbuf = (const uint16_t *) inbuf;
678 wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail];
683 while (data->outbufavail + 4 <= data->outbufsize
686 outbuf[actually++] = *newinbuf++;
687 data->outbufavail += 4;
693 /* We have an incomplete input character. */
694 mbstate_t *state = data->statep;
696 state->value = *(uint8_t *) newinbuf;
700 /* Remember how much we converted. */
701 do_write += actually * sizeof (wchar_t);
703 /* Check whether an illegal character appeared. */
706 result = GCONV_ILLEGAL_INPUT;
710 if (*inlen == 0 && !__mbsinit (data->statep))
712 /* We have an incomplete character at the end. */
713 result = GCONV_INCOMPLETE_INPUT;
719 /* This is the last step. */
720 result = (data->outbufavail + sizeof (wchar_t) > data->outbufsize
721 ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT);
726 result = GCONV_EMPTY_INPUT;
728 if (data->outbufavail > 0)
730 /* Call the functions below in the chain. */
731 size_t newavail = data->outbufavail;
733 result = (*fct) (next_step, next_data, data->outbuf, &newavail,
736 /* Correct the output buffer. */
737 if (newavail != data->outbufavail && newavail > 0)
739 memmove (data->outbuf,
740 &data->outbuf[data->outbufavail - newavail],
742 data->outbufavail = newavail;
746 while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
748 __set_errno (save_errno);
751 if (written != NULL && data->is_last)
759 __gconv_transform_ucs4_ucs2 (struct gconv_step *step,
760 struct gconv_step_data *data, const char *inbuf,
761 size_t *inlen, size_t *written, int do_flush)
763 struct gconv_step *next_step = step + 1;
764 struct gconv_step_data *next_data = data + 1;
765 gconv_fct fct = next_step->fct;
769 /* If the function is called with no input this means we have to reset
770 to the initial state. The possibly partly converted input is
774 /* Clear the state. */
775 memset (data->statep, '\0', sizeof (mbstate_t));
778 /* Call the steps down the chain if there are any. */
783 struct gconv_step *next_step = step + 1;
784 struct gconv_step_data *next_data = data + 1;
786 result = (*fct) (next_step, next_data, NULL, 0, written, 1);
788 /* Clear output buffer. */
789 data->outbufavail = 0;
794 int save_errno = errno;
799 const wchar_t *newinbuf = (const wchar_t *) inbuf;
800 uint16_t *outbuf = (uint16_t *) &data->outbuf[data->outbufavail];
805 while (data->outbufavail + 2 <= data->outbufsize
808 if (*newinbuf >= 0x10000)
810 __set_errno (EILSEQ);
813 outbuf[actually++] = (wchar_t) *newinbuf;
815 data->outbufavail += 2;
820 /* We have an incomplete input character. */
821 mbstate_t *state = data->statep;
822 state->count = *inlen;
827 state->value += *(uint8_t *) newinbuf;
832 /* Remember how much we converted. */
833 do_write += (const char *) newinbuf - inbuf;
835 /* Check whether an illegal character appeared. */
838 result = GCONV_ILLEGAL_INPUT;
842 if (*inlen == 0 && !__mbsinit (data->statep))
844 /* We have an incomplete character at the end. */
845 result = GCONV_INCOMPLETE_INPUT;
851 /* This is the last step. */
852 result = *inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT;
857 result = GCONV_EMPTY_INPUT;
859 if (data->outbufavail > 0)
861 /* Call the functions below in the chain. */
862 size_t newavail = data->outbufavail;
864 result = (*fct) (next_step, next_data, data->outbuf, &newavail,
867 /* Correct the output buffer. */
868 if (newavail != data->outbufavail && newavail > 0)
870 memmove (data->outbuf,
871 &data->outbuf[data->outbufavail - newavail],
873 data->outbufavail = newavail;
877 while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
879 __set_errno (save_errno);
882 if (written != NULL && data->is_last)
883 *written = do_write / sizeof (wchar_t);