1 /* Test of canonical normalization of UTF-8 strings.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
21 #if GNULIB_TEST_UNINORM_U8_NORMALIZE
33 check (const uint8_t *input, size_t input_length,
34 const uint8_t *expected, size_t expected_length)
39 /* Test return conventions with resultbuf == NULL. */
40 result = u8_normalize (UNINORM_NFC, input, input_length, NULL, &length);
41 if (!(result != NULL))
43 if (!(length == expected_length))
45 if (!(u8_cmp (result, expected, expected_length) == 0))
49 /* Test return conventions with resultbuf too small. */
50 if (expected_length > 0)
52 uint8_t *preallocated;
54 length = expected_length - 1;
55 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
56 result = u8_normalize (UNINORM_NFC, input, input_length, preallocated, &length);
57 if (!(result != NULL))
59 if (!(result != preallocated))
61 if (!(length == expected_length))
63 if (!(u8_cmp (result, expected, expected_length) == 0))
69 /* Test return conventions with resultbuf large enough. */
71 uint8_t *preallocated;
73 length = expected_length;
74 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
75 result = u8_normalize (UNINORM_NFC, input, input_length, preallocated, &length);
76 if (!(result != NULL))
78 if (!(preallocated == NULL || result == preallocated))
80 if (!(length == expected_length))
82 if (!(u8_cmp (result, expected, expected_length) == 0))
94 ASSERT (check (NULL, 0, NULL, 0) == 0);
97 static const uint8_t input[] = { 0x20 };
98 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
101 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
102 static const uint8_t input[] = { 0xC3, 0x84 };
103 static const uint8_t decomposed[] = { 0x41, 0xCC, 0x88 };
104 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
105 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
108 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
109 static const uint8_t input[] = { 0xC7, 0x9E };
110 static const uint8_t decomposed[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
111 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
112 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
115 { /* ANGSTROM SIGN */
116 static const uint8_t input[] = { 0xE2, 0x84, 0xAB };
117 static const uint8_t decomposed[] = { 0x41, 0xCC, 0x8A };
118 static const uint8_t expected[] = { 0xC3, 0x85 };
119 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
120 ASSERT (check (decomposed, SIZEOF (decomposed), expected, SIZEOF (expected)) == 0);
121 ASSERT (check (expected, SIZEOF (expected), expected, SIZEOF (expected)) == 0);
124 { /* GREEK DIALYTIKA AND PERISPOMENI */
125 static const uint8_t input[] = { 0xE1, 0xBF, 0x81 };
126 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
129 { /* SCRIPT SMALL L */
130 static const uint8_t input[] = { 0xE2, 0x84, 0x93 };
131 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
134 { /* NO-BREAK SPACE */
135 static const uint8_t input[] = { 0xC2, 0xA0 };
136 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
139 { /* ARABIC LETTER VEH INITIAL FORM */
140 static const uint8_t input[] = { 0xEF, 0xAD, 0xAC };
141 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
144 { /* ARABIC LETTER VEH MEDIAL FORM */
145 static const uint8_t input[] = { 0xEF, 0xAD, 0xAD };
146 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
149 { /* ARABIC LETTER VEH FINAL FORM */
150 static const uint8_t input[] = { 0xEF, 0xAD, 0xAB };
151 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
154 { /* ARABIC LETTER VEH ISOLATED FORM */
155 static const uint8_t input[] = { 0xEF, 0xAD, 0xAA };
156 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
159 { /* CIRCLED NUMBER FIFTEEN */
160 static const uint8_t input[] = { 0xE2, 0x91, 0xAE };
161 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
164 { /* TRADE MARK SIGN */
165 static const uint8_t input[] = { 0xE2, 0x84, 0xA2 };
166 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
169 { /* LATIN SUBSCRIPT SMALL LETTER I */
170 static const uint8_t input[] = { 0xE1, 0xB5, 0xA2 };
171 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
174 { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
175 static const uint8_t input[] = { 0xEF, 0xB8, 0xB5 };
176 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
179 { /* FULLWIDTH LATIN CAPITAL LETTER A */
180 static const uint8_t input[] = { 0xEF, 0xBC, 0xA1 };
181 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
184 { /* HALFWIDTH IDEOGRAPHIC COMMA */
185 static const uint8_t input[] = { 0xEF, 0xBD, 0xA4 };
186 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
189 { /* SMALL IDEOGRAPHIC COMMA */
190 static const uint8_t input[] = { 0xEF, 0xB9, 0x91 };
191 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
195 static const uint8_t input[] = { 0xE3, 0x8E, 0x92 };
196 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
199 { /* VULGAR FRACTION THREE EIGHTHS */
200 static const uint8_t input[] = { 0xE2, 0x85, 0x9C };
201 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
205 static const uint8_t input[] = { 0xC2, 0xB5 };
206 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
209 { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
210 static const uint8_t input[] = { 0xEF, 0xB7, 0xBA };
211 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
214 { /* HANGUL SYLLABLE GEUL */
215 static const uint8_t input[] = { 0xEA, 0xB8, 0x80 };
216 static const uint8_t decomposed[] =
217 { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
218 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
219 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
222 { /* HANGUL SYLLABLE GEU */
223 static const uint8_t input[] = { 0xEA, 0xB7, 0xB8 };
224 static const uint8_t decomposed[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
225 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
226 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
229 { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
230 static const uint8_t input[] =
231 { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
232 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
233 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
234 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
235 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
236 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
237 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
239 0xEA, 0xB8, 0x80, '\n'
241 static const uint8_t decomposed[] =
242 { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
243 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
244 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
245 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
246 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
247 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
248 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
249 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
250 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
252 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
253 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
257 /* Declare failure if test takes too long, by using default abort
258 caused by SIGALRM. */
259 signal (SIGALRM, SIG_DFL);
263 /* Check that the sorting is not O(n²) but O(n log n). */
266 for (pass = 0; pass < 3; pass++)
270 uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
273 uint8_t *expected = input + (2 * m - 1);
275 size_t m2 = (m - 1) / 2;
276 /* NB: m1 + m2 == m - 1. */
285 for (i = 0; i < m1; i++)
290 for (i = 0; i < m2; i++)
298 for (i = 0; i < m2; i++)
303 for (i = 0; i < m1; i++)
311 for (i = 0; i < m2; i++)
332 for (i = 0; i < m1; i++)
337 for (i = 0; i < m2 - 1; i++)
343 for (; repeat > 0; repeat--)
345 ASSERT (check (input, 2 * m - 1, expected, 2 * m - 2) == 0);
346 ASSERT (check (expected, 2 * m - 2, expected, 2 * m - 2) == 0);