packaging: Enable memcpy in sysdeps/arm/memcpy.S for ARM
[platform/upstream/glibc.git] / iconv / gconv_simple.c
1 /* Simple transformations functions.
2    Copyright (C) 1997-2024 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18
19 #include <byteswap.h>
20 #include <dlfcn.h>
21 #include <endian.h>
22 #include <errno.h>
23 #include <gconv.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <wchar.h>
28 #include <sys/param.h>
29 #include <gconv_int.h>
30
31 #define BUILTIN_ALIAS(s1, s2) /* nothing */
32 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
33                                MinF, MaxF, MinT, MaxT) \
34   extern int Fct (struct __gconv_step *, struct __gconv_step_data *,          \
35                   const unsigned char **, const unsigned char *,              \
36                   unsigned char **, size_t *, int, int);
37 #include "gconv_builtin.h"
38
39
40 #ifndef EILSEQ
41 # define EILSEQ EINVAL
42 #endif
43
44
45 /* Specialized conversion function for a single byte to INTERNAL, recognizing
46    only ASCII characters.  */
47 wint_t
48 __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
49 {
50   if (c < 0x80)
51     return c;
52   else
53     return WEOF;
54 }
55
56
57 /* Transform from the internal, UCS4-like format, to UCS4.  The
58    difference between the internal ucs4 format and the real UCS4
59    format is, if any, the endianness.  The Unicode/ISO 10646 says that
60    unless some higher protocol specifies it differently, the byte
61    order is big endian.*/
62 #define DEFINE_INIT             0
63 #define DEFINE_FINI             0
64 #define MIN_NEEDED_FROM         4
65 #define MIN_NEEDED_TO           4
66 #define FROM_DIRECTION          1
67 #define FROM_LOOP               internal_ucs4_loop
68 #define TO_LOOP                 internal_ucs4_loop /* This is not used.  */
69 #define FUNCTION_NAME           __gconv_transform_internal_ucs4
70 #define ONE_DIRECTION           0
71
72
73 static inline int
74 __attribute ((always_inline))
75 internal_ucs4_loop (struct __gconv_step *step,
76                     struct __gconv_step_data *step_data,
77                     const unsigned char **inptrp, const unsigned char *inend,
78                     unsigned char **outptrp, const unsigned char *outend,
79                     size_t *irreversible)
80 {
81   const unsigned char *inptr = *inptrp;
82   unsigned char *outptr = *outptrp;
83   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
84   int result;
85
86 #if __BYTE_ORDER == __LITTLE_ENDIAN
87   /* Sigh, we have to do some real work.  */
88   size_t cnt;
89
90   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
91     {
92       uint32_t val = get32 (inptr);
93       put32 (outptr, __builtin_bswap32 (val));
94     }
95
96   *inptrp = inptr;
97   *outptrp = outptr;
98 #elif __BYTE_ORDER == __BIG_ENDIAN
99   /* Simply copy the data.  */
100   *inptrp = inptr + n_convert * 4;
101   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
102 #else
103 # error "This endianness is not supported."
104 #endif
105
106   /* Determine the status.  */
107   if (*inptrp == inend)
108     result = __GCONV_EMPTY_INPUT;
109   else if (*outptrp + 4 > outend)
110     result = __GCONV_FULL_OUTPUT;
111   else
112     result = __GCONV_INCOMPLETE_INPUT;
113
114   return result;
115 }
116
117
118 static inline int
119 __attribute ((always_inline))
120 internal_ucs4_loop_single (struct __gconv_step *step,
121                            struct __gconv_step_data *step_data,
122                            const unsigned char **inptrp,
123                            const unsigned char *inend,
124                            unsigned char **outptrp,
125                            const unsigned char *outend,
126                            size_t *irreversible)
127 {
128   mbstate_t *state = step_data->__statep;
129   size_t cnt = state->__count & 7;
130
131   while (*inptrp < inend && cnt < 4)
132     state->__value.__wchb[cnt++] = *(*inptrp)++;
133
134   if (__glibc_unlikely (cnt < 4))
135     {
136       /* Still not enough bytes.  Store the ones in the input buffer.  */
137       state->__count &= ~7;
138       state->__count |= cnt;
139
140       return __GCONV_INCOMPLETE_INPUT;
141     }
142
143 #if __BYTE_ORDER == __LITTLE_ENDIAN
144   (*outptrp)[0] = state->__value.__wchb[3];
145   (*outptrp)[1] = state->__value.__wchb[2];
146   (*outptrp)[2] = state->__value.__wchb[1];
147   (*outptrp)[3] = state->__value.__wchb[0];
148
149 #elif __BYTE_ORDER == __BIG_ENDIAN
150   /* XXX unaligned */
151   (*outptrp)[0] = state->__value.__wchb[0];
152   (*outptrp)[1] = state->__value.__wchb[1];
153   (*outptrp)[2] = state->__value.__wchb[2];
154   (*outptrp)[3] = state->__value.__wchb[3];
155 #else
156 # error "This endianness is not supported."
157 #endif
158   *outptrp += 4;
159
160   /* Clear the state buffer.  */
161   state->__count &= ~7;
162
163   return __GCONV_OK;
164 }
165
166 #include <iconv/skeleton.c>
167
168
169 /* Transform from UCS4 to the internal, UCS4-like format.  Unlike
170    for the other direction we have to check for correct values here.  */
171 #define DEFINE_INIT             0
172 #define DEFINE_FINI             0
173 #define MIN_NEEDED_FROM         4
174 #define MIN_NEEDED_TO           4
175 #define FROM_DIRECTION          1
176 #define FROM_LOOP               ucs4_internal_loop
177 #define TO_LOOP                 ucs4_internal_loop /* This is not used.  */
178 #define FUNCTION_NAME           __gconv_transform_ucs4_internal
179 #define ONE_DIRECTION           0
180
181
182 static inline int
183 __attribute ((always_inline))
184 ucs4_internal_loop (struct __gconv_step *step,
185                     struct __gconv_step_data *step_data,
186                     const unsigned char **inptrp, const unsigned char *inend,
187                     unsigned char **outptrp, const unsigned char *outend,
188                     size_t *irreversible)
189 {
190   int flags = step_data->__flags;
191   const unsigned char *inptr = *inptrp;
192   unsigned char *outptr = *outptrp;
193   int result;
194
195   for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4)
196     {
197       uint32_t inval = get32 (inptr);
198 #if __BYTE_ORDER == __LITTLE_ENDIAN
199       inval = __builtin_bswap32 (inval);
200 #endif
201
202       if (__glibc_unlikely (inval > 0x7fffffff))
203         {
204           /* The value is too large.  We don't try transliteration here since
205              this is not an error because of the lack of possibilities to
206              represent the result.  This is a genuine bug in the input since
207              UCS4 does not allow such values.  */
208           if (irreversible == NULL)
209             /* We are transliterating, don't try to correct anything.  */
210             return __GCONV_ILLEGAL_INPUT;
211
212           if (flags & __GCONV_IGNORE_ERRORS)
213             {
214               /* Just ignore this character.  */
215               ++*irreversible;
216               continue;
217             }
218
219           *inptrp = inptr;
220           *outptrp = outptr;
221           return __GCONV_ILLEGAL_INPUT;
222         }
223
224       put32 (outptr, inval);
225       outptr += sizeof (uint32_t);
226     }
227
228   *inptrp = inptr;
229   *outptrp = outptr;
230
231   /* Determine the status.  */
232   if (*inptrp == inend)
233     result = __GCONV_EMPTY_INPUT;
234   else if (*outptrp + 4 > outend)
235     result = __GCONV_FULL_OUTPUT;
236   else
237     result = __GCONV_INCOMPLETE_INPUT;
238
239   return result;
240 }
241
242
243 static inline int
244 __attribute ((always_inline))
245 ucs4_internal_loop_single (struct __gconv_step *step,
246                            struct __gconv_step_data *step_data,
247                            const unsigned char **inptrp,
248                            const unsigned char *inend,
249                            unsigned char **outptrp,
250                            const unsigned char *outend,
251                            size_t *irreversible)
252 {
253   mbstate_t *state = step_data->__statep;
254   int flags = step_data->__flags;
255   size_t cnt = state->__count & 7;
256
257   while (*inptrp < inend && cnt < 4)
258     state->__value.__wchb[cnt++] = *(*inptrp)++;
259
260   if (__glibc_unlikely (cnt < 4))
261     {
262       /* Still not enough bytes.  Store the ones in the input buffer.  */
263       state->__count &= ~7;
264       state->__count |= cnt;
265
266       return __GCONV_INCOMPLETE_INPUT;
267     }
268
269   if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
270                         0))
271     {
272       /* The value is too large.  We don't try transliteration here since
273          this is not an error because of the lack of possibilities to
274          represent the result.  This is a genuine bug in the input since
275          UCS4 does not allow such values.  */
276       if (!(flags & __GCONV_IGNORE_ERRORS))
277         {
278           *inptrp -= cnt - (state->__count & 7);
279           return __GCONV_ILLEGAL_INPUT;
280         }
281     }
282   else
283     {
284 #if __BYTE_ORDER == __LITTLE_ENDIAN
285       (*outptrp)[0] = state->__value.__wchb[3];
286       (*outptrp)[1] = state->__value.__wchb[2];
287       (*outptrp)[2] = state->__value.__wchb[1];
288       (*outptrp)[3] = state->__value.__wchb[0];
289 #elif __BYTE_ORDER == __BIG_ENDIAN
290       (*outptrp)[0] = state->__value.__wchb[0];
291       (*outptrp)[1] = state->__value.__wchb[1];
292       (*outptrp)[2] = state->__value.__wchb[2];
293       (*outptrp)[3] = state->__value.__wchb[3];
294 #endif
295
296       *outptrp += 4;
297     }
298
299   /* Clear the state buffer.  */
300   state->__count &= ~7;
301
302   return __GCONV_OK;
303 }
304
305 #include <iconv/skeleton.c>
306
307
308 /* Similarly for the little endian form.  */
309 #define DEFINE_INIT             0
310 #define DEFINE_FINI             0
311 #define MIN_NEEDED_FROM         4
312 #define MIN_NEEDED_TO           4
313 #define FROM_DIRECTION          1
314 #define FROM_LOOP               internal_ucs4le_loop
315 #define TO_LOOP                 internal_ucs4le_loop /* This is not used.  */
316 #define FUNCTION_NAME           __gconv_transform_internal_ucs4le
317 #define ONE_DIRECTION           0
318
319
320 static inline int
321 __attribute ((always_inline))
322 internal_ucs4le_loop (struct __gconv_step *step,
323                       struct __gconv_step_data *step_data,
324                       const unsigned char **inptrp, const unsigned char *inend,
325                       unsigned char **outptrp, const unsigned char *outend,
326                       size_t *irreversible)
327 {
328   const unsigned char *inptr = *inptrp;
329   unsigned char *outptr = *outptrp;
330   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
331   int result;
332
333 #if __BYTE_ORDER == __BIG_ENDIAN
334   /* Sigh, we have to do some real work.  */
335   size_t cnt;
336
337   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
338     {
339       uint32_t val = get32 (inptr);
340       put32 (outptr, __builtin_bswap32 (val));
341     }
342
343   *inptrp = inptr;
344   *outptrp = outptr;
345 #elif __BYTE_ORDER == __LITTLE_ENDIAN
346   /* Simply copy the data.  */
347   *inptrp = inptr + n_convert * 4;
348   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
349 #else
350 # error "This endianness is not supported."
351 #endif
352
353   /* Determine the status.  */
354   if (*inptrp == inend)
355     result = __GCONV_EMPTY_INPUT;
356   else if (*outptrp + 4 > outend)
357     result = __GCONV_FULL_OUTPUT;
358   else
359     result = __GCONV_INCOMPLETE_INPUT;
360
361   return result;
362 }
363
364
365 static inline int
366 __attribute ((always_inline))
367 internal_ucs4le_loop_single (struct __gconv_step *step,
368                              struct __gconv_step_data *step_data,
369                              const unsigned char **inptrp,
370                              const unsigned char *inend,
371                              unsigned char **outptrp,
372                              const unsigned char *outend,
373                              size_t *irreversible)
374 {
375   mbstate_t *state = step_data->__statep;
376   size_t cnt = state->__count & 7;
377
378   while (*inptrp < inend && cnt < 4)
379     state->__value.__wchb[cnt++] = *(*inptrp)++;
380
381   if (__glibc_unlikely (cnt < 4))
382     {
383       /* Still not enough bytes.  Store the ones in the input buffer.  */
384       state->__count &= ~7;
385       state->__count |= cnt;
386
387       return __GCONV_INCOMPLETE_INPUT;
388     }
389
390 #if __BYTE_ORDER == __BIG_ENDIAN
391   (*outptrp)[0] = state->__value.__wchb[3];
392   (*outptrp)[1] = state->__value.__wchb[2];
393   (*outptrp)[2] = state->__value.__wchb[1];
394   (*outptrp)[3] = state->__value.__wchb[0];
395
396 #else
397   /* XXX unaligned */
398   (*outptrp)[0] = state->__value.__wchb[0];
399   (*outptrp)[1] = state->__value.__wchb[1];
400   (*outptrp)[2] = state->__value.__wchb[2];
401   (*outptrp)[3] = state->__value.__wchb[3];
402
403 #endif
404
405   *outptrp += 4;
406
407   /* Clear the state buffer.  */
408   state->__count &= ~7;
409
410   return __GCONV_OK;
411 }
412
413 #include <iconv/skeleton.c>
414
415
416 /* And finally from UCS4-LE to the internal encoding.  */
417 #define DEFINE_INIT             0
418 #define DEFINE_FINI             0
419 #define MIN_NEEDED_FROM         4
420 #define MIN_NEEDED_TO           4
421 #define FROM_DIRECTION          1
422 #define FROM_LOOP               ucs4le_internal_loop
423 #define TO_LOOP                 ucs4le_internal_loop /* This is not used.  */
424 #define FUNCTION_NAME           __gconv_transform_ucs4le_internal
425 #define ONE_DIRECTION           0
426
427
428 static inline int
429 __attribute ((always_inline))
430 ucs4le_internal_loop (struct __gconv_step *step,
431                       struct __gconv_step_data *step_data,
432                       const unsigned char **inptrp, const unsigned char *inend,
433                       unsigned char **outptrp, const unsigned char *outend,
434                       size_t *irreversible)
435 {
436   int flags = step_data->__flags;
437   const unsigned char *inptr = *inptrp;
438   unsigned char *outptr = *outptrp;
439   int result;
440
441   for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4)
442     {
443       uint32_t inval = get32 (inptr);
444 #if __BYTE_ORDER == __BIG_ENDIAN
445       inval = __builtin_bswap32 (inval);
446 #endif
447
448       if (__glibc_unlikely (inval > 0x7fffffff))
449         {
450           /* The value is too large.  We don't try transliteration here since
451              this is not an error because of the lack of possibilities to
452              represent the result.  This is a genuine bug in the input since
453              UCS4 does not allow such values.  */
454           if (irreversible == NULL)
455             /* We are transliterating, don't try to correct anything.  */
456             return __GCONV_ILLEGAL_INPUT;
457
458           if (flags & __GCONV_IGNORE_ERRORS)
459             {
460               /* Just ignore this character.  */
461               ++*irreversible;
462               continue;
463             }
464
465           *inptrp = inptr;
466           *outptrp = outptr;
467           return __GCONV_ILLEGAL_INPUT;
468         }
469
470       put32 (outptr, inval);
471       outptr += sizeof (uint32_t);
472     }
473
474   *inptrp = inptr;
475   *outptrp = outptr;
476
477   /* Determine the status.  */
478   if (*inptrp == inend)
479     result = __GCONV_EMPTY_INPUT;
480   else if (*inptrp + 4 > inend)
481     result = __GCONV_INCOMPLETE_INPUT;
482   else
483     {
484       assert (*outptrp + 4 > outend);
485       result = __GCONV_FULL_OUTPUT;
486     }
487
488   return result;
489 }
490
491
492 static inline int
493 __attribute ((always_inline))
494 ucs4le_internal_loop_single (struct __gconv_step *step,
495                              struct __gconv_step_data *step_data,
496                              const unsigned char **inptrp,
497                              const unsigned char *inend,
498                              unsigned char **outptrp,
499                              const unsigned char *outend,
500                              size_t *irreversible)
501 {
502   mbstate_t *state = step_data->__statep;
503   int flags = step_data->__flags;
504   size_t cnt = state->__count & 7;
505
506   while (*inptrp < inend && cnt < 4)
507     state->__value.__wchb[cnt++] = *(*inptrp)++;
508
509   if (__glibc_unlikely (cnt < 4))
510     {
511       /* Still not enough bytes.  Store the ones in the input buffer.  */
512       state->__count &= ~7;
513       state->__count |= cnt;
514
515       return __GCONV_INCOMPLETE_INPUT;
516     }
517
518   if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
519                         0))
520     {
521       /* The value is too large.  We don't try transliteration here since
522          this is not an error because of the lack of possibilities to
523          represent the result.  This is a genuine bug in the input since
524          UCS4 does not allow such values.  */
525       if (!(flags & __GCONV_IGNORE_ERRORS))
526         return __GCONV_ILLEGAL_INPUT;
527     }
528   else
529     {
530 #if __BYTE_ORDER == __BIG_ENDIAN
531       (*outptrp)[0] = state->__value.__wchb[3];
532       (*outptrp)[1] = state->__value.__wchb[2];
533       (*outptrp)[2] = state->__value.__wchb[1];
534       (*outptrp)[3] = state->__value.__wchb[0];
535 #else
536       (*outptrp)[0] = state->__value.__wchb[0];
537       (*outptrp)[1] = state->__value.__wchb[1];
538       (*outptrp)[2] = state->__value.__wchb[2];
539       (*outptrp)[3] = state->__value.__wchb[3];
540 #endif
541
542       *outptrp += 4;
543     }
544
545   /* Clear the state buffer.  */
546   state->__count &= ~7;
547
548   return __GCONV_OK;
549 }
550
551 #include <iconv/skeleton.c>
552
553
554 /* Convert from ISO 646-IRV to the internal (UCS4-like) format.  */
555 #define DEFINE_INIT             0
556 #define DEFINE_FINI             0
557 #define MIN_NEEDED_FROM         1
558 #define MIN_NEEDED_TO           4
559 #define FROM_DIRECTION          1
560 #define FROM_LOOP               ascii_internal_loop
561 #define TO_LOOP                 ascii_internal_loop /* This is not used.  */
562 #define FUNCTION_NAME           __gconv_transform_ascii_internal
563 #define ONE_DIRECTION           1
564
565 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
566 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
567 #define LOOPFCT                 FROM_LOOP
568 #define BODY \
569   {                                                                           \
570     if (__glibc_unlikely (*inptr > '\x7f'))                                   \
571       {                                                                       \
572         /* The value is too large.  We don't try transliteration here since   \
573            this is not an error because of the lack of possibilities to       \
574            represent the result.  This is a genuine bug in the input since    \
575            ASCII does not allow such values.  */                              \
576         STANDARD_FROM_LOOP_ERR_HANDLER (1);                                   \
577       }                                                                       \
578     else                                                                      \
579       {                                                                       \
580         /* It's an one byte sequence.  */                                     \
581         *((uint32_t *) outptr) = *inptr++;                                    \
582         outptr += sizeof (uint32_t);                                          \
583       }                                                                       \
584   }
585 #define LOOP_NEED_FLAGS
586 #include <iconv/loop.c>
587 #include <iconv/skeleton.c>
588
589
590 /* Convert from the internal (UCS4-like) format to ISO 646-IRV.  */
591 #define DEFINE_INIT             0
592 #define DEFINE_FINI             0
593 #define MIN_NEEDED_FROM         4
594 #define MIN_NEEDED_TO           1
595 #define FROM_DIRECTION          1
596 #define FROM_LOOP               internal_ascii_loop
597 #define TO_LOOP                 internal_ascii_loop /* This is not used.  */
598 #define FUNCTION_NAME           __gconv_transform_internal_ascii
599 #define ONE_DIRECTION           1
600
601 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
602 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
603 #define LOOPFCT                 FROM_LOOP
604 #define BODY \
605   {                                                                           \
606     if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f))                \
607       {                                                                       \
608         UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4);                 \
609         STANDARD_TO_LOOP_ERR_HANDLER (4);                                     \
610       }                                                                       \
611     else                                                                      \
612       {                                                                       \
613         /* It's an one byte sequence.  */                                     \
614         *outptr++ = *((const uint32_t *) inptr);                              \
615         inptr += sizeof (uint32_t);                                           \
616       }                                                                       \
617   }
618 #define LOOP_NEED_FLAGS
619 #include <iconv/loop.c>
620 #include <iconv/skeleton.c>
621
622
623 /* Convert from the internal (UCS4-like) format to UTF-8.  */
624 #define DEFINE_INIT             0
625 #define DEFINE_FINI             0
626 #define MIN_NEEDED_FROM         4
627 #define MIN_NEEDED_TO           1
628 #define MAX_NEEDED_TO           6
629 #define FROM_DIRECTION          1
630 #define FROM_LOOP               internal_utf8_loop
631 #define TO_LOOP                 internal_utf8_loop /* This is not used.  */
632 #define FUNCTION_NAME           __gconv_transform_internal_utf8
633 #define ONE_DIRECTION           1
634
635 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
636 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
637 #define MAX_NEEDED_OUTPUT       MAX_NEEDED_TO
638 #define LOOPFCT                 FROM_LOOP
639 #define BODY \
640   {                                                                           \
641     uint32_t wc = *((const uint32_t *) inptr);                                \
642                                                                               \
643     if (__glibc_likely (wc < 0x80))                                           \
644       /* It's an one byte sequence.  */                                       \
645       *outptr++ = (unsigned char) wc;                                         \
646     else if (__glibc_likely (wc <= 0x7fffffff                                 \
647                              && (wc < 0xd800 || wc > 0xdfff)))                \
648       {                                                                       \
649         size_t step;                                                          \
650         unsigned char *start;                                                 \
651                                                                               \
652         for (step = 2; step < 6; ++step)                                      \
653           if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0)                   \
654             break;                                                            \
655                                                                               \
656         if (__glibc_unlikely (outptr + step > outend))                        \
657           {                                                                   \
658             /* Too long.  */                                                  \
659             result = __GCONV_FULL_OUTPUT;                                     \
660             break;                                                            \
661           }                                                                   \
662                                                                               \
663         start = outptr;                                                       \
664         *outptr = (unsigned char) (~0xff >> step);                            \
665         outptr += step;                                                       \
666         do                                                                    \
667           {                                                                   \
668             start[--step] = 0x80 | (wc & 0x3f);                               \
669             wc >>= 6;                                                         \
670           }                                                                   \
671         while (step > 1);                                                     \
672         start[0] |= wc;                                                       \
673       }                                                                       \
674     else                                                                      \
675       {                                                                       \
676         STANDARD_TO_LOOP_ERR_HANDLER (4);                                     \
677       }                                                                       \
678                                                                               \
679     inptr += 4;                                                               \
680   }
681 #define LOOP_NEED_FLAGS
682 #include <iconv/loop.c>
683 #include <iconv/skeleton.c>
684
685
686 /* Convert from UTF-8 to the internal (UCS4-like) format.  */
687 #define DEFINE_INIT             0
688 #define DEFINE_FINI             0
689 #define MIN_NEEDED_FROM         1
690 #define MAX_NEEDED_FROM         6
691 #define MIN_NEEDED_TO           4
692 #define FROM_DIRECTION          1
693 #define FROM_LOOP               utf8_internal_loop
694 #define TO_LOOP                 utf8_internal_loop /* This is not used.  */
695 #define FUNCTION_NAME           __gconv_transform_utf8_internal
696 #define ONE_DIRECTION           1
697
698 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
699 #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
700 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
701 #define LOOPFCT                 FROM_LOOP
702 #define BODY \
703   {                                                                           \
704     /* Next input byte.  */                                                   \
705     uint32_t ch = *inptr;                                                     \
706                                                                               \
707     if (__glibc_likely (ch < 0x80))                                           \
708       {                                                                       \
709         /* One byte sequence.  */                                             \
710         ++inptr;                                                              \
711       }                                                                       \
712     else                                                                      \
713       {                                                                       \
714         unsigned int cnt;                                                     \
715         unsigned int i;                                               \
716                                                                               \
717         if (ch >= 0xc2 && ch < 0xe0)                                          \
718           {                                                                   \
719             /* We expect two bytes.  The first byte cannot be 0xc0 or 0xc1,   \
720                otherwise the wide character could have been represented       \
721                using a single byte.  */                                       \
722             cnt = 2;                                                          \
723             ch &= 0x1f;                                                       \
724           }                                                                   \
725         else if (__glibc_likely ((ch & 0xf0) == 0xe0))                        \
726           {                                                                   \
727             /* We expect three bytes.  */                                     \
728             cnt = 3;                                                          \
729             ch &= 0x0f;                                                       \
730           }                                                                   \
731         else if (__glibc_likely ((ch & 0xf8) == 0xf0))                        \
732           {                                                                   \
733             /* We expect four bytes.  */                                      \
734             cnt = 4;                                                          \
735             ch &= 0x07;                                                       \
736           }                                                                   \
737         else if (__glibc_likely ((ch & 0xfc) == 0xf8))                        \
738           {                                                                   \
739             /* We expect five bytes.  */                                      \
740             cnt = 5;                                                          \
741             ch &= 0x03;                                                       \
742           }                                                                   \
743         else if (__glibc_likely ((ch & 0xfe) == 0xfc))                        \
744           {                                                                   \
745             /* We expect six bytes.  */                                       \
746             cnt = 6;                                                          \
747             ch &= 0x01;                                                       \
748           }                                                                   \
749         else                                                                  \
750           {                                                                   \
751             /* Search the end of this ill-formed UTF-8 character.  This       \
752                is the next byte with (x & 0xc0) != 0x80.  */                  \
753             i = 0;                                                            \
754             do                                                                \
755               ++i;                                                            \
756             while (inptr + i < inend                                          \
757                    && (*(inptr + i) & 0xc0) == 0x80                           \
758                    && i < 5);                                                 \
759                                                                               \
760           errout:                                                             \
761             STANDARD_FROM_LOOP_ERR_HANDLER (i);                               \
762           }                                                                   \
763                                                                               \
764         if (__glibc_unlikely (inptr + cnt > inend))                           \
765           {                                                                   \
766             /* We don't have enough input.  But before we report that check   \
767                that all the bytes are correct.  */                            \
768             for (i = 1; inptr + i < inend; ++i)                               \
769               if ((inptr[i] & 0xc0) != 0x80)                                  \
770                 break;                                                        \
771                                                                               \
772             if (__glibc_likely (inptr + i == inend))                          \
773               {                                                               \
774                 result = __GCONV_INCOMPLETE_INPUT;                            \
775                 break;                                                        \
776               }                                                               \
777                                                                               \
778             goto errout;                                                      \
779           }                                                                   \
780                                                                               \
781         /* Read the possible remaining bytes.  */                             \
782         for (i = 1; i < cnt; ++i)                                             \
783           {                                                                   \
784             uint32_t byte = inptr[i];                                         \
785                                                                               \
786             if ((byte & 0xc0) != 0x80)                                        \
787               /* This is an illegal encoding.  */                             \
788               break;                                                          \
789                                                                               \
790             ch <<= 6;                                                         \
791             ch |= byte & 0x3f;                                                \
792           }                                                                   \
793                                                                               \
794         /* If i < cnt, some trail byte was not >= 0x80, < 0xc0.               \
795            If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could       \
796            have been represented with fewer than cnt bytes.  */               \
797         if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)                \
798             /* Do not accept UTF-16 surrogates.  */                           \
799             || (ch >= 0xd800 && ch <= 0xdfff))                                \
800           {                                                                   \
801             /* This is an illegal encoding.  */                               \
802             goto errout;                                                      \
803           }                                                                   \
804                                                                               \
805         inptr += cnt;                                                         \
806       }                                                                       \
807                                                                               \
808     /* Now adjust the pointers and store the result.  */                      \
809     *((uint32_t *) outptr) = ch;                                              \
810     outptr += sizeof (uint32_t);                                              \
811   }
812 #define LOOP_NEED_FLAGS
813
814 #define STORE_REST \
815   {                                                                           \
816     /* We store the remaining bytes while converting them into the UCS4       \
817        format.  We can assume that the first byte in the buffer is            \
818        correct and that it requires a larger number of bytes than there       \
819        are in the input buffer.  */                                           \
820     wint_t ch = **inptrp;                                                     \
821     size_t cnt, r;                                                            \
822                                                                               \
823     state->__count = inend - *inptrp;                                         \
824                                                                               \
825     assert (ch != 0xc0 && ch != 0xc1);                                        \
826     if (ch >= 0xc2 && ch < 0xe0)                                              \
827       {                                                                       \
828         /* We expect two bytes.  The first byte cannot be 0xc0 or             \
829            0xc1, otherwise the wide character could have been                 \
830            represented using a single byte.  */                               \
831         cnt = 2;                                                              \
832         ch &= 0x1f;                                                           \
833       }                                                                       \
834     else if (__glibc_likely ((ch & 0xf0) == 0xe0))                            \
835       {                                                                       \
836         /* We expect three bytes.  */                                         \
837         cnt = 3;                                                              \
838         ch &= 0x0f;                                                           \
839       }                                                                       \
840     else if (__glibc_likely ((ch & 0xf8) == 0xf0))                            \
841       {                                                                       \
842         /* We expect four bytes.  */                                          \
843         cnt = 4;                                                              \
844         ch &= 0x07;                                                           \
845       }                                                                       \
846     else if (__glibc_likely ((ch & 0xfc) == 0xf8))                            \
847       {                                                                       \
848         /* We expect five bytes.  */                                          \
849         cnt = 5;                                                              \
850         ch &= 0x03;                                                           \
851       }                                                                       \
852     else                                                                      \
853       {                                                                       \
854         /* We expect six bytes.  */                                           \
855         cnt = 6;                                                              \
856         ch &= 0x01;                                                           \
857       }                                                                       \
858                                                                               \
859     /* The first byte is already consumed.  */                                \
860     r = cnt - 1;                                                              \
861     while (++(*inptrp) < inend)                                               \
862       {                                                                       \
863         ch <<= 6;                                                             \
864         ch |= **inptrp & 0x3f;                                                \
865         --r;                                                                  \
866       }                                                                       \
867                                                                               \
868     /* Shift for the so far missing bytes.  */                                \
869     ch <<= r * 6;                                                             \
870                                                                               \
871     /* Store the number of bytes expected for the entire sequence.  */        \
872     state->__count |= cnt << 8;                                               \
873                                                                               \
874     /* Store the value.  */                                                   \
875     state->__value.__wch = ch;                                                \
876   }
877
878 #define UNPACK_BYTES \
879   {                                                                           \
880     static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc };  \
881     wint_t wch = state->__value.__wch;                                        \
882     size_t ntotal = state->__count >> 8;                                      \
883                                                                               \
884     inlen = state->__count & 255;                                             \
885                                                                               \
886     bytebuf[0] = inmask[ntotal - 2];                                          \
887                                                                               \
888     do                                                                        \
889       {                                                                       \
890         if (--ntotal < inlen)                                                 \
891           bytebuf[ntotal] = 0x80 | (wch & 0x3f);                              \
892         wch >>= 6;                                                            \
893       }                                                                       \
894     while (ntotal > 1);                                                       \
895                                                                               \
896     bytebuf[0] |= wch;                                                        \
897   }
898
899 #define CLEAR_STATE \
900   state->__count = 0
901
902
903 #include <iconv/loop.c>
904 #include <iconv/skeleton.c>
905
906
907 /* Convert from UCS2 to the internal (UCS4-like) format.  */
908 #define DEFINE_INIT             0
909 #define DEFINE_FINI             0
910 #define MIN_NEEDED_FROM         2
911 #define MIN_NEEDED_TO           4
912 #define FROM_DIRECTION          1
913 #define FROM_LOOP               ucs2_internal_loop
914 #define TO_LOOP                 ucs2_internal_loop /* This is not used.  */
915 #define FUNCTION_NAME           __gconv_transform_ucs2_internal
916 #define ONE_DIRECTION           1
917
918 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
919 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
920 #define LOOPFCT                 FROM_LOOP
921 #define BODY \
922   {                                                                           \
923     uint16_t u1 = get16 (inptr);                                              \
924                                                                               \
925     if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000))                       \
926       {                                                                       \
927         /* Surrogate characters in UCS-2 input are not valid.  Reject         \
928            them.  (Catching this here is not security relevant.)  */          \
929         STANDARD_FROM_LOOP_ERR_HANDLER (2);                                   \
930       }                                                                       \
931                                                                               \
932     *((uint32_t *) outptr) = u1;                                              \
933     outptr += sizeof (uint32_t);                                              \
934     inptr += 2;                                                               \
935   }
936 #define LOOP_NEED_FLAGS
937 #include <iconv/loop.c>
938 #include <iconv/skeleton.c>
939
940
941 /* Convert from the internal (UCS4-like) format to UCS2.  */
942 #define DEFINE_INIT             0
943 #define DEFINE_FINI             0
944 #define MIN_NEEDED_FROM         4
945 #define MIN_NEEDED_TO           2
946 #define FROM_DIRECTION          1
947 #define FROM_LOOP               internal_ucs2_loop
948 #define TO_LOOP                 internal_ucs2_loop /* This is not used.  */
949 #define FUNCTION_NAME           __gconv_transform_internal_ucs2
950 #define ONE_DIRECTION           1
951
952 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
953 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
954 #define LOOPFCT                 FROM_LOOP
955 #define BODY \
956   {                                                                           \
957     uint32_t val = *((const uint32_t *) inptr);                               \
958                                                                               \
959     if (__glibc_unlikely (val >= 0x10000))                                    \
960       {                                                                       \
961         UNICODE_TAG_HANDLER (val, 4);                                         \
962         STANDARD_TO_LOOP_ERR_HANDLER (4);                                     \
963       }                                                                       \
964     else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000))                \
965       {                                                                       \
966         /* Surrogate characters in UCS-4 input are not valid.                 \
967            We must catch this, because the UCS-2 output might be              \
968            interpreted as UTF-16 by other programs.  If we let                \
969            surrogates pass through, attackers could make a security           \
970            hole exploit by synthesizing any desired plane 1-16                \
971            character.  */                                                     \
972         result = __GCONV_ILLEGAL_INPUT;                                       \
973         if (! ignore_errors_p ())                                             \
974           break;                                                              \
975         inptr += 4;                                                           \
976         ++*irreversible;                                                      \
977         continue;                                                             \
978       }                                                                       \
979     else                                                                      \
980       {                                                                       \
981         put16 (outptr, val);                                                  \
982         outptr += sizeof (uint16_t);                                          \
983         inptr += 4;                                                           \
984       }                                                                       \
985   }
986 #define LOOP_NEED_FLAGS
987 #include <iconv/loop.c>
988 #include <iconv/skeleton.c>
989
990
991 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
992 #define DEFINE_INIT             0
993 #define DEFINE_FINI             0
994 #define MIN_NEEDED_FROM         2
995 #define MIN_NEEDED_TO           4
996 #define FROM_DIRECTION          1
997 #define FROM_LOOP               ucs2reverse_internal_loop
998 #define TO_LOOP                 ucs2reverse_internal_loop/* This is not used.*/
999 #define FUNCTION_NAME           __gconv_transform_ucs2reverse_internal
1000 #define ONE_DIRECTION           1
1001
1002 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
1003 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
1004 #define LOOPFCT                 FROM_LOOP
1005 #define BODY \
1006   {                                                                           \
1007     uint16_t u1 = bswap_16 (get16 (inptr));                                   \
1008                                                                               \
1009     if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000))                       \
1010       {                                                                       \
1011         /* Surrogate characters in UCS-2 input are not valid.  Reject         \
1012            them.  (Catching this here is not security relevant.)  */          \
1013         if (! ignore_errors_p ())                                             \
1014           {                                                                   \
1015             result = __GCONV_ILLEGAL_INPUT;                                   \
1016             break;                                                            \
1017           }                                                                   \
1018         inptr += 2;                                                           \
1019         ++*irreversible;                                                      \
1020         continue;                                                             \
1021       }                                                                       \
1022                                                                               \
1023     *((uint32_t *) outptr) = u1;                                              \
1024     outptr += sizeof (uint32_t);                                              \
1025     inptr += 2;                                                               \
1026   }
1027 #define LOOP_NEED_FLAGS
1028 #include <iconv/loop.c>
1029 #include <iconv/skeleton.c>
1030
1031
1032 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1033 #define DEFINE_INIT             0
1034 #define DEFINE_FINI             0
1035 #define MIN_NEEDED_FROM         4
1036 #define MIN_NEEDED_TO           2
1037 #define FROM_DIRECTION          1
1038 #define FROM_LOOP               internal_ucs2reverse_loop
1039 #define TO_LOOP                 internal_ucs2reverse_loop/* This is not used.*/
1040 #define FUNCTION_NAME           __gconv_transform_internal_ucs2reverse
1041 #define ONE_DIRECTION           1
1042
1043 #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
1044 #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
1045 #define LOOPFCT                 FROM_LOOP
1046 #define BODY \
1047   {                                                                           \
1048     uint32_t val = *((const uint32_t *) inptr);                               \
1049     if (__glibc_unlikely (val >= 0x10000))                                    \
1050       {                                                                       \
1051         UNICODE_TAG_HANDLER (val, 4);                                         \
1052         STANDARD_TO_LOOP_ERR_HANDLER (4);                                     \
1053       }                                                                       \
1054     else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000))                \
1055       {                                                                       \
1056         /* Surrogate characters in UCS-4 input are not valid.                 \
1057            We must catch this, because the UCS-2 output might be              \
1058            interpreted as UTF-16 by other programs.  If we let                \
1059            surrogates pass through, attackers could make a security           \
1060            hole exploit by synthesizing any desired plane 1-16                \
1061            character.  */                                                     \
1062         if (! ignore_errors_p ())                                             \
1063           {                                                                   \
1064             result = __GCONV_ILLEGAL_INPUT;                                   \
1065             break;                                                            \
1066           }                                                                   \
1067         inptr += 4;                                                           \
1068         ++*irreversible;                                                      \
1069         continue;                                                             \
1070       }                                                                       \
1071     else                                                                      \
1072       {                                                                       \
1073         put16 (outptr, bswap_16 (val));                                       \
1074         outptr += sizeof (uint16_t);                                          \
1075         inptr += 4;                                                           \
1076       }                                                                       \
1077   }
1078 #define LOOP_NEED_FLAGS
1079 #include <iconv/loop.c>
1080 #include <iconv/skeleton.c>