Imported Upstream version 0.9.4
[platform/upstream/libunistring.git] / lib / unistr / u8-mbtouc-unsafe.c
1 /* Look at first character in UTF-8 string.
2    Copyright (C) 1999-2002, 2006-2007, 2009-2014 Free Software Foundation, Inc.
3    Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5    This program is free software: you can redistribute it and/or modify it
6    under the terms of the GNU Lesser General Public License as published
7    by the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18 #include <config.h>
19
20 #if defined IN_LIBUNISTRING
21 /* Tell unistr.h to declare u8_mbtouc_unsafe as 'extern', not
22    'static inline'.  */
23 # include "unistring-notinline.h"
24 #endif
25
26 /* Specification.  */
27 #include "unistr.h"
28
29 #if !HAVE_INLINE
30
31 int
32 u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
33 {
34   uint8_t c = *s;
35
36   if (c < 0x80)
37     {
38       *puc = c;
39       return 1;
40     }
41   else if (c >= 0xc2)
42     {
43       if (c < 0xe0)
44         {
45           if (n >= 2)
46             {
47 #if CONFIG_UNICODE_SAFETY
48               if ((s[1] ^ 0x80) < 0x40)
49 #endif
50                 {
51                   *puc = ((unsigned int) (c & 0x1f) << 6)
52                          | (unsigned int) (s[1] ^ 0x80);
53                   return 2;
54                 }
55 #if CONFIG_UNICODE_SAFETY
56               /* invalid multibyte character */
57 #endif
58             }
59           else
60             {
61               /* incomplete multibyte character */
62               *puc = 0xfffd;
63               return 1;
64             }
65         }
66       else if (c < 0xf0)
67         {
68           if (n >= 3)
69             {
70 #if CONFIG_UNICODE_SAFETY
71               if ((s[1] ^ 0x80) < 0x40)
72                 {
73                   if ((s[2] ^ 0x80) < 0x40)
74                     {
75                       if ((c >= 0xe1 || s[1] >= 0xa0)
76                           && (c != 0xed || s[1] < 0xa0))
77 #endif
78                         {
79                           *puc = ((unsigned int) (c & 0x0f) << 12)
80                                  | ((unsigned int) (s[1] ^ 0x80) << 6)
81                                  | (unsigned int) (s[2] ^ 0x80);
82                           return 3;
83                         }
84 #if CONFIG_UNICODE_SAFETY
85                       /* invalid multibyte character */
86                       *puc = 0xfffd;
87                       return 3;
88                     }
89                   /* invalid multibyte character */
90                   *puc = 0xfffd;
91                   return 2;
92                 }
93               /* invalid multibyte character */
94 #endif
95             }
96           else
97             {
98               /* incomplete multibyte character */
99               *puc = 0xfffd;
100               if (n == 1 || (s[1] ^ 0x80) >= 0x40)
101                 return 1;
102               else
103                 return 2;
104             }
105         }
106       else if (c < 0xf8)
107         {
108           if (n >= 4)
109             {
110 #if CONFIG_UNICODE_SAFETY
111               if ((s[1] ^ 0x80) < 0x40)
112                 {
113                   if ((s[2] ^ 0x80) < 0x40)
114                     {
115                       if ((s[3] ^ 0x80) < 0x40)
116                         {
117                           if ((c >= 0xf1 || s[1] >= 0x90)
118 #if 1
119                               && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
120 #endif
121                              )
122 #endif
123                             {
124                               *puc = ((unsigned int) (c & 0x07) << 18)
125                                      | ((unsigned int) (s[1] ^ 0x80) << 12)
126                                      | ((unsigned int) (s[2] ^ 0x80) << 6)
127                                      | (unsigned int) (s[3] ^ 0x80);
128                               return 4;
129                             }
130 #if CONFIG_UNICODE_SAFETY
131                           /* invalid multibyte character */
132                           *puc = 0xfffd;
133                           return 4;
134                         }
135                       /* invalid multibyte character */
136                       *puc = 0xfffd;
137                       return 3;
138                     }
139                   /* invalid multibyte character */
140                   *puc = 0xfffd;
141                   return 2;
142                 }
143               /* invalid multibyte character */
144 #endif
145             }
146           else
147             {
148               /* incomplete multibyte character */
149               *puc = 0xfffd;
150               if (n == 1 || (s[1] ^ 0x80) >= 0x40)
151                 return 1;
152               else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
153                 return 2;
154               else
155                 return 3;
156             }
157         }
158 #if 0
159       else if (c < 0xfc)
160         {
161           if (n >= 5)
162             {
163 #if CONFIG_UNICODE_SAFETY
164               if ((s[1] ^ 0x80) < 0x40)
165                 {
166                   if ((s[2] ^ 0x80) < 0x40)
167                     {
168                       if ((s[3] ^ 0x80) < 0x40)
169                         {
170                           if ((s[4] ^ 0x80) < 0x40)
171                             {
172                               if (c >= 0xf9 || s[1] >= 0x88)
173 #endif
174                                 {
175                                   *puc = ((unsigned int) (c & 0x03) << 24)
176                                          | ((unsigned int) (s[1] ^ 0x80) << 18)
177                                          | ((unsigned int) (s[2] ^ 0x80) << 12)
178                                          | ((unsigned int) (s[3] ^ 0x80) << 6)
179                                          | (unsigned int) (s[4] ^ 0x80);
180                                   return 5;
181                                 }
182 #if CONFIG_UNICODE_SAFETY
183                               /* invalid multibyte character */
184                               *puc = 0xfffd;
185                               return 5;
186                             }
187                           /* invalid multibyte character */
188                           *puc = 0xfffd;
189                           return 4;
190                         }
191                       /* invalid multibyte character */
192                       *puc = 0xfffd;
193                       return 3;
194                     }
195                   /* invalid multibyte character */
196                   return 2;
197                 }
198               /* invalid multibyte character */
199 #endif
200             }
201           else
202             {
203               /* incomplete multibyte character */
204               *puc = 0xfffd;
205               return n;
206             }
207         }
208       else if (c < 0xfe)
209         {
210           if (n >= 6)
211             {
212 #if CONFIG_UNICODE_SAFETY
213               if ((s[1] ^ 0x80) < 0x40)
214                 {
215                   if ((s[2] ^ 0x80) < 0x40)
216                     {
217                       if ((s[3] ^ 0x80) < 0x40)
218                         {
219                           if ((s[4] ^ 0x80) < 0x40)
220                             {
221                               if ((s[5] ^ 0x80) < 0x40)
222                                 {
223                                   if (c >= 0xfd || s[1] >= 0x84)
224 #endif
225                                     {
226                                       *puc = ((unsigned int) (c & 0x01) << 30)
227                                              | ((unsigned int) (s[1] ^ 0x80) << 24)
228                                              | ((unsigned int) (s[2] ^ 0x80) << 18)
229                                              | ((unsigned int) (s[3] ^ 0x80) << 12)
230                                              | ((unsigned int) (s[4] ^ 0x80) << 6)
231                                              | (unsigned int) (s[5] ^ 0x80);
232                                       return 6;
233                                     }
234 #if CONFIG_UNICODE_SAFETY
235                                   /* invalid multibyte character */
236                                   *puc = 0xfffd;
237                                   return 6;
238                                 }
239                               /* invalid multibyte character */
240                               *puc = 0xfffd;
241                               return 5;
242                             }
243                           /* invalid multibyte character */
244                           *puc = 0xfffd;
245                           return 4;
246                         }
247                       /* invalid multibyte character */
248                       *puc = 0xfffd;
249                       return 3;
250                     }
251                   /* invalid multibyte character */
252                   return 2;
253                 }
254               /* invalid multibyte character */
255 #endif
256             }
257           else
258             {
259               /* incomplete multibyte character */
260               *puc = 0xfffd;
261               return n;
262             }
263         }
264 #endif
265     }
266   /* invalid multibyte character */
267   *puc = 0xfffd;
268   return 1;
269 }
270
271 #endif