Upstream version 7.36.149.0
[platform/framework/web/crosswalk.git] / src / base / strings / utf_offset_string_conversions_unittest.cc
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6
7 #include "base/logging.h"
8 #include "base/strings/string_piece.h"
9 #include "base/strings/utf_offset_string_conversions.h"
10 #include "testing/gtest/include/gtest/gtest.h"
11
12 namespace base {
13
14 namespace {
15
16 static const size_t kNpos = string16::npos;
17
18 }  // namespace
19
20 TEST(UTFOffsetStringConversionsTest, AdjustOffset) {
21   struct UTF8ToUTF16Case {
22     const char* utf8;
23     size_t input_offset;
24     size_t output_offset;
25   } utf8_to_utf16_cases[] = {
26     {"", 0, 0},
27     {"", kNpos, kNpos},
28     {"\xe4\xbd\xa0\xe5\xa5\xbd", 1, kNpos},
29     {"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1},
30     {"\xed\xb0\x80z", 3, 1},
31     {"A\xF0\x90\x8C\x80z", 1, 1},
32     {"A\xF0\x90\x8C\x80z", 2, kNpos},
33     {"A\xF0\x90\x8C\x80z", 5, 3},
34     {"A\xF0\x90\x8C\x80z", 6, 4},
35     {"A\xF0\x90\x8C\x80z", kNpos, kNpos},
36   };
37   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf8_to_utf16_cases); ++i) {
38     const size_t offset = utf8_to_utf16_cases[i].input_offset;
39     std::vector<size_t> offsets;
40     offsets.push_back(offset);
41     UTF8ToUTF16AndAdjustOffsets(utf8_to_utf16_cases[i].utf8, &offsets);
42     EXPECT_EQ(utf8_to_utf16_cases[i].output_offset, offsets[0]);
43   }
44
45   struct UTF16ToUTF8Case {
46     char16 utf16[10];
47     size_t input_offset;
48     size_t output_offset;
49   } utf16_to_utf8_cases[] = {
50       {{}, 0, 0},
51       // Converted to 3-byte utf-8 sequences
52       {{0x5909, 0x63DB}, 3, kNpos},
53       {{0x5909, 0x63DB}, 2, 6},
54       {{0x5909, 0x63DB}, 1, 3},
55       {{0x5909, 0x63DB}, 0, 0},
56       // Converted to 2-byte utf-8 sequences
57       {{'A', 0x00bc, 0x00be, 'z'}, 1, 1},
58       {{'A', 0x00bc, 0x00be, 'z'}, 2, 3},
59       {{'A', 0x00bc, 0x00be, 'z'}, 3, 5},
60       {{'A', 0x00bc, 0x00be, 'z'}, 4, 6},
61       // Surrogate pair
62       {{'A', 0xd800, 0xdf00, 'z'}, 1, 1},
63       {{'A', 0xd800, 0xdf00, 'z'}, 2, kNpos},
64       {{'A', 0xd800, 0xdf00, 'z'}, 3, 5},
65       {{'A', 0xd800, 0xdf00, 'z'}, 4, 6},
66   };
67   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf16_to_utf8_cases); ++i) {
68     size_t offset = utf16_to_utf8_cases[i].input_offset;
69     std::vector<size_t> offsets;
70     offsets.push_back(offset);
71     UTF16ToUTF8AndAdjustOffsets(utf16_to_utf8_cases[i].utf16, &offsets);
72     EXPECT_EQ(utf16_to_utf8_cases[i].output_offset, offsets[0]) << i;
73   }
74 }
75
76 TEST(UTFOffsetStringConversionsTest, LimitOffsets) {
77   const size_t kLimit = 10;
78   const size_t kItems = 20;
79   std::vector<size_t> size_ts;
80   for (size_t t = 0; t < kItems; ++t)
81     size_ts.push_back(t);
82   std::for_each(size_ts.begin(), size_ts.end(),
83                 LimitOffset<string16>(kLimit));
84   size_t unlimited_count = 0;
85   for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end();
86        ++ti) {
87     if (*ti != kNpos)
88       ++unlimited_count;
89   }
90   EXPECT_EQ(11U, unlimited_count);
91
92   // Reverse the values in the vector and try again.
93   size_ts.clear();
94   for (size_t t = kItems; t > 0; --t)
95     size_ts.push_back(t - 1);
96   std::for_each(size_ts.begin(), size_ts.end(),
97                 LimitOffset<string16>(kLimit));
98   unlimited_count = 0;
99   for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end();
100        ++ti) {
101     if (*ti != kNpos)
102       ++unlimited_count;
103   }
104   EXPECT_EQ(11U, unlimited_count);
105 }
106
107 TEST(UTFOffsetStringConversionsTest, AdjustOffsets) {
108   // Imagine we have strings as shown in the following cases where the
109   // X's represent encoded characters.
110   // 1: abcXXXdef ==> abcXdef
111   {
112     std::vector<size_t> offsets;
113     for (size_t t = 0; t <= 9; ++t)
114       offsets.push_back(t);
115     OffsetAdjuster::Adjustments adjustments;
116     adjustments.push_back(OffsetAdjuster::Adjustment(3, 3, 1));
117     OffsetAdjuster::AdjustOffsets(adjustments, &offsets);
118     size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6, 7};
119     EXPECT_EQ(offsets.size(), arraysize(expected_1));
120     for (size_t i = 0; i < arraysize(expected_1); ++i)
121       EXPECT_EQ(expected_1[i], offsets[i]);
122   }
123
124   // 2: XXXaXXXXbcXXXXXXXdefXXX ==> XaXXbcXXXXdefX
125   {
126     std::vector<size_t> offsets;
127     for (size_t t = 0; t <= 23; ++t)
128       offsets.push_back(t);
129     OffsetAdjuster::Adjustments adjustments;
130     adjustments.push_back(OffsetAdjuster::Adjustment(0, 3, 1));
131     adjustments.push_back(OffsetAdjuster::Adjustment(4, 4, 2));
132     adjustments.push_back(OffsetAdjuster::Adjustment(10, 7, 4));
133     adjustments.push_back(OffsetAdjuster::Adjustment(20, 3, 1));
134     OffsetAdjuster::AdjustOffsets(adjustments, &offsets);
135     size_t expected_2[] = {
136       0, kNpos, kNpos, 1, 2, kNpos, kNpos, kNpos, 4, 5, 6, kNpos, kNpos, kNpos,
137       kNpos, kNpos, kNpos, 10, 11, 12, 13, kNpos, kNpos, 14
138     };
139     EXPECT_EQ(offsets.size(), arraysize(expected_2));
140     for (size_t i = 0; i < arraysize(expected_2); ++i)
141       EXPECT_EQ(expected_2[i], offsets[i]);
142   }
143
144   // 3: XXXaXXXXbcdXXXeXX ==> aXXXXbcdXXXe
145   {
146     std::vector<size_t> offsets;
147     for (size_t t = 0; t <= 17; ++t)
148       offsets.push_back(t);
149     OffsetAdjuster::Adjustments adjustments;
150     adjustments.push_back(OffsetAdjuster::Adjustment(0, 3, 0));
151     adjustments.push_back(OffsetAdjuster::Adjustment(4, 4, 4));
152     adjustments.push_back(OffsetAdjuster::Adjustment(11, 3, 3));
153     adjustments.push_back(OffsetAdjuster::Adjustment(15, 2, 0));
154     OffsetAdjuster::AdjustOffsets(adjustments, &offsets);
155     size_t expected_3[] = {
156       0, kNpos, kNpos, 0, 1, kNpos, kNpos, kNpos, 5, 6, 7, 8, kNpos, kNpos, 11,
157       12, kNpos, 12
158     };
159     EXPECT_EQ(offsets.size(), arraysize(expected_3));
160     for (size_t i = 0; i < arraysize(expected_3); ++i)
161       EXPECT_EQ(expected_3[i], offsets[i]);
162   }
163 }
164
165 TEST(UTFOffsetStringConversionsTest, UnadjustOffsets) {
166   // Imagine we have strings as shown in the following cases where the
167   // X's represent encoded characters.
168   // 1: abcXXXdef ==> abcXdef
169   {
170     std::vector<size_t> offsets;
171     for (size_t t = 0; t <= 7; ++t)
172       offsets.push_back(t);
173     OffsetAdjuster::Adjustments adjustments;
174     adjustments.push_back(OffsetAdjuster::Adjustment(3, 3, 1));
175     OffsetAdjuster::UnadjustOffsets(adjustments, &offsets);
176     size_t expected_1[] = {0, 1, 2, 3, 6, 7, 8, 9};
177     EXPECT_EQ(offsets.size(), arraysize(expected_1));
178     for (size_t i = 0; i < arraysize(expected_1); ++i)
179       EXPECT_EQ(expected_1[i], offsets[i]);
180   }
181
182   // 2: XXXaXXXXbcXXXXXXXdefXXX ==> XaXXbcXXXXdefX
183   {
184     std::vector<size_t> offsets;
185     for (size_t t = 0; t <= 14; ++t)
186       offsets.push_back(t);
187     OffsetAdjuster::Adjustments adjustments;
188     adjustments.push_back(OffsetAdjuster::Adjustment(0, 3, 1));
189     adjustments.push_back(OffsetAdjuster::Adjustment(4, 4, 2));
190     adjustments.push_back(OffsetAdjuster::Adjustment(10, 7, 4));
191     adjustments.push_back(OffsetAdjuster::Adjustment(20, 3, 1));
192     OffsetAdjuster::UnadjustOffsets(adjustments, &offsets);
193     size_t expected_2[] = {
194       0, 3, 4, kNpos, 8, 9, 10, kNpos, kNpos, kNpos, 17, 18, 19, 20, 23
195     };
196     EXPECT_EQ(offsets.size(), arraysize(expected_2));
197     for (size_t i = 0; i < arraysize(expected_2); ++i)
198       EXPECT_EQ(expected_2[i], offsets[i]);
199   }
200
201   // 3: XXXaXXXXbcdXXXeXX ==> aXXXXbcdXXXe
202   {
203     std::vector<size_t> offsets;
204     for (size_t t = 0; t <= 12; ++t)
205       offsets.push_back(t);
206     OffsetAdjuster::Adjustments adjustments;
207     adjustments.push_back(OffsetAdjuster::Adjustment(0, 3, 0));
208     adjustments.push_back(OffsetAdjuster::Adjustment(4, 4, 4));
209     adjustments.push_back(OffsetAdjuster::Adjustment(11, 3, 3));
210     adjustments.push_back(OffsetAdjuster::Adjustment(15, 2, 0));
211     OffsetAdjuster::UnadjustOffsets(adjustments, &offsets);
212     size_t expected_3[] = {
213       0,  // this could just as easily be 3
214       4, kNpos, kNpos, kNpos, 8, 9, 10, 11, kNpos, kNpos, 14,
215       15  // this could just as easily be 17
216     };
217     EXPECT_EQ(offsets.size(), arraysize(expected_3));
218     for (size_t i = 0; i < arraysize(expected_3); ++i)
219       EXPECT_EQ(expected_3[i], offsets[i]);
220   }
221 }
222
223 // MergeSequentialAdjustments is used by net/base/escape.{h,cc} and
224 // net/base/net_util.{h,cc}.  The two tests EscapeTest.AdjustOffset and
225 // NetUtilTest.FormatUrlWithOffsets test its behavior extensively.  This
226 // is simply a short, additional test.
227 TEST(UTFOffsetStringConversionsTest, MergeSequentialAdjustments) {
228   // Pretend the input string is "abcdefghijklmnopqrstuvwxyz".
229
230   // Set up |first_adjustments| to
231   // - remove the leading "a"
232   // - combine the "bc" into one character (call it ".")
233   // - remove the "f"
234   // - remove the "tuv"
235   // The resulting string should be ".deghijklmnopqrswxyz".
236   OffsetAdjuster::Adjustments first_adjustments;
237   first_adjustments.push_back(OffsetAdjuster::Adjustment(0, 1, 0));
238   first_adjustments.push_back(OffsetAdjuster::Adjustment(1, 2, 1));
239   first_adjustments.push_back(OffsetAdjuster::Adjustment(5, 1, 0));
240   first_adjustments.push_back(OffsetAdjuster::Adjustment(19, 3, 0));
241
242   // Set up |adjustments_on_adjusted_string| to
243   // - combine the "." character that replaced "bc" with "d" into one character
244   //   (call it "?")
245   // - remove the "egh"
246   // - expand the "i" into two characters (call them "12")
247   // - combine the "jkl" into one character (call it "@")
248   // - expand the "z" into two characters (call it "34")
249   // The resulting string should be "?12@mnopqrswxy34".
250   OffsetAdjuster::Adjustments adjustments_on_adjusted_string;
251   adjustments_on_adjusted_string.push_back(OffsetAdjuster::Adjustment(
252       0, 2, 1));
253   adjustments_on_adjusted_string.push_back(OffsetAdjuster::Adjustment(
254       2, 3, 0));
255   adjustments_on_adjusted_string.push_back(OffsetAdjuster::Adjustment(
256       5, 1, 2));
257   adjustments_on_adjusted_string.push_back(OffsetAdjuster::Adjustment(
258       6, 3, 1));
259   adjustments_on_adjusted_string.push_back(OffsetAdjuster::Adjustment(
260       19, 1, 2));
261
262   // Now merge the adjustments and check the results.
263   OffsetAdjuster::MergeSequentialAdjustments(first_adjustments,
264                                              &adjustments_on_adjusted_string);
265   // The merged adjustments should look like
266   // - combine abcd into "?"
267   //   - note: it's also reasonable for the Merge function to instead produce
268   //     two adjustments instead of this, one to remove a and another to
269   //     combine bcd into "?".  This test verifies the current behavior.
270   // - remove efgh
271   // - expand i into "12"
272   // - combine jkl into "@"
273   // - remove tuv
274   // - expand z into "34"
275   ASSERT_EQ(6u, adjustments_on_adjusted_string.size());
276   EXPECT_EQ(0u, adjustments_on_adjusted_string[0].original_offset);
277   EXPECT_EQ(4u, adjustments_on_adjusted_string[0].original_length);
278   EXPECT_EQ(1u, adjustments_on_adjusted_string[0].output_length);
279   EXPECT_EQ(4u, adjustments_on_adjusted_string[1].original_offset);
280   EXPECT_EQ(4u, adjustments_on_adjusted_string[1].original_length);
281   EXPECT_EQ(0u, adjustments_on_adjusted_string[1].output_length);
282   EXPECT_EQ(8u, adjustments_on_adjusted_string[2].original_offset);
283   EXPECT_EQ(1u, adjustments_on_adjusted_string[2].original_length);
284   EXPECT_EQ(2u, adjustments_on_adjusted_string[2].output_length);
285   EXPECT_EQ(9u, adjustments_on_adjusted_string[3].original_offset);
286   EXPECT_EQ(3u, adjustments_on_adjusted_string[3].original_length);
287   EXPECT_EQ(1u, adjustments_on_adjusted_string[3].output_length);
288   EXPECT_EQ(19u, adjustments_on_adjusted_string[4].original_offset);
289   EXPECT_EQ(3u, adjustments_on_adjusted_string[4].original_length);
290   EXPECT_EQ(0u, adjustments_on_adjusted_string[4].output_length);
291   EXPECT_EQ(25u, adjustments_on_adjusted_string[5].original_offset);
292   EXPECT_EQ(1u, adjustments_on_adjusted_string[5].original_length);
293   EXPECT_EQ(2u, adjustments_on_adjusted_string[5].output_length);
294 }
295
296 }  // namaspace base