Imported Upstream version 1.4.19
[platform/upstream/m4.git] / lib / unistr / u8-mbtoucr.c
1 /* Look at first character in UTF-8 string, returning an error code.
2    Copyright (C) 1999-2002, 2006-2007, 2009-2021 Free Software Foundation, Inc.
3    Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5    This program is free software: you can redistribute it and/or modify it
6    under the terms of the GNU General Public License as published
7    by the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    General Public License for more details.
14
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17
18 #include <config.h>
19
20 /* Specification.  */
21 #include "unistr.h"
22
23 int
24 u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n)
25 {
26   uint8_t c = *s;
27
28   if (c < 0x80)
29     {
30       *puc = c;
31       return 1;
32     }
33   else if (c >= 0xc2)
34     {
35       if (c < 0xe0)
36         {
37           if (n >= 2)
38             {
39               if ((s[1] ^ 0x80) < 0x40)
40                 {
41                   *puc = ((unsigned int) (c & 0x1f) << 6)
42                          | (unsigned int) (s[1] ^ 0x80);
43                   return 2;
44                 }
45               /* invalid multibyte character */
46             }
47           else
48             {
49               /* incomplete multibyte character */
50               *puc = 0xfffd;
51               return -2;
52             }
53         }
54       else if (c < 0xf0)
55         {
56           if (n >= 2)
57             {
58               if ((s[1] ^ 0x80) < 0x40
59                   && (c >= 0xe1 || s[1] >= 0xa0)
60                   && (c != 0xed || s[1] < 0xa0))
61                 {
62                   if (n >= 3)
63                     {
64                       if ((s[2] ^ 0x80) < 0x40)
65                         {
66                           *puc = ((unsigned int) (c & 0x0f) << 12)
67                                  | ((unsigned int) (s[1] ^ 0x80) << 6)
68                                  | (unsigned int) (s[2] ^ 0x80);
69                           return 3;
70                         }
71                       /* invalid multibyte character */
72                     }
73                   else
74                     {
75                       /* incomplete multibyte character */
76                       *puc = 0xfffd;
77                       return -2;
78                     }
79                 }
80               /* invalid multibyte character */
81             }
82           else
83             {
84               /* incomplete multibyte character */
85               *puc = 0xfffd;
86               return -2;
87             }
88         }
89       else if (c < 0xf8)
90         {
91           if (n >= 2)
92             {
93               if ((s[1] ^ 0x80) < 0x40
94                   && (c >= 0xf1 || s[1] >= 0x90)
95                   && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)))
96                 {
97                   if (n >= 3)
98                     {
99                       if ((s[2] ^ 0x80) < 0x40)
100                         {
101                           if (n >= 4)
102                             {
103                               if ((s[3] ^ 0x80) < 0x40)
104                                 {
105                                   *puc = ((unsigned int) (c & 0x07) << 18)
106                                          | ((unsigned int) (s[1] ^ 0x80) << 12)
107                                          | ((unsigned int) (s[2] ^ 0x80) << 6)
108                                          | (unsigned int) (s[3] ^ 0x80);
109                                   return 4;
110                                 }
111                               /* invalid multibyte character */
112                             }
113                           else
114                             {
115                               /* incomplete multibyte character */
116                               *puc = 0xfffd;
117                               return -2;
118                             }
119                         }
120                       /* invalid multibyte character */
121                     }
122                   else
123                     {
124                       /* incomplete multibyte character */
125                       *puc = 0xfffd;
126                       return -2;
127                     }
128                 }
129               /* invalid multibyte character */
130             }
131           else
132             {
133               /* incomplete multibyte character */
134               *puc = 0xfffd;
135               return -2;
136             }
137         }
138     }
139   /* invalid multibyte character */
140   *puc = 0xfffd;
141   return -1;
142 }