Upstream version 7.36.149.0
[platform/framework/web/crosswalk.git] / src / net / base / escape_unittest.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include <string>
7
8 #include "net/base/escape.h"
9
10 #include "base/basictypes.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/stringprintf.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "testing/gtest/include/gtest/gtest.h"
15
16 namespace net {
17 namespace {
18
19 struct EscapeCase {
20   const char* input;
21   const char* output;
22 };
23
24 struct UnescapeURLCase {
25   const wchar_t* input;
26   UnescapeRule::Type rules;
27   const wchar_t* output;
28 };
29
30 struct UnescapeURLCaseASCII {
31   const char* input;
32   UnescapeRule::Type rules;
33   const char* output;
34 };
35
36 struct UnescapeAndDecodeCase {
37   const char* input;
38
39   // The expected output when run through UnescapeURL.
40   const char* url_unescaped;
41
42   // The expected output when run through UnescapeQuery.
43   const char* query_unescaped;
44
45   // The expected output when run through UnescapeAndDecodeURLComponent.
46   const wchar_t* decoded;
47 };
48
49 struct AdjustOffsetCase {
50   const char* input;
51   size_t input_offset;
52   size_t output_offset;
53 };
54
55 struct EscapeForHTMLCase {
56   const char* input;
57   const char* expected_output;
58 };
59
60 TEST(EscapeTest, EscapeTextForFormSubmission) {
61   const EscapeCase escape_cases[] = {
62     {"foo", "foo"},
63     {"foo bar", "foo+bar"},
64     {"foo++", "foo%2B%2B"}
65   };
66   for (size_t i = 0; i < arraysize(escape_cases); ++i) {
67     EscapeCase value = escape_cases[i];
68     EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, true));
69   }
70
71   const EscapeCase escape_cases_no_plus[] = {
72     {"foo", "foo"},
73     {"foo bar", "foo%20bar"},
74     {"foo++", "foo%2B%2B"}
75   };
76   for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) {
77     EscapeCase value = escape_cases_no_plus[i];
78     EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, false));
79   }
80
81   // Test all the values in we're supposed to be escaping.
82   const std::string no_escape(
83     "abcdefghijklmnopqrstuvwxyz"
84     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
85     "0123456789"
86     "!'()*-._~");
87   for (int i = 0; i < 256; ++i) {
88     std::string in;
89     in.push_back(i);
90     std::string out = EscapeQueryParamValue(in, true);
91     if (0 == i) {
92       EXPECT_EQ(out, std::string("%00"));
93     } else if (32 == i) {
94       // Spaces are plus escaped like web forms.
95       EXPECT_EQ(out, std::string("+"));
96     } else if (no_escape.find(in) == std::string::npos) {
97       // Check %hex escaping
98       std::string expected = base::StringPrintf("%%%02X", i);
99       EXPECT_EQ(expected, out);
100     } else {
101       // No change for things in the no_escape list.
102       EXPECT_EQ(out, in);
103     }
104   }
105 }
106
107 TEST(EscapeTest, EscapePath) {
108   ASSERT_EQ(
109     // Most of the character space we care about, un-escaped
110     EscapePath(
111       "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
112       "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
113       "[\\]^_`abcdefghijklmnopqrstuvwxyz"
114       "{|}~\x7f\x80\xff"),
115     // Escaped
116     "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;"
117     "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
118     "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
119     "%7B%7C%7D~%7F%80%FF");
120 }
121
122 TEST(EscapeTest, DataURLWithAccentedCharacters) {
123   const std::string url =
124       "text/html;charset=utf-8,%3Chtml%3E%3Cbody%3ETonton,%20ton%20th%C3"
125       "%A9%20t'a-t-il%20%C3%B4t%C3%A9%20ta%20toux%20";
126
127   base::OffsetAdjuster::Adjustments adjustments;
128   net::UnescapeAndDecodeUTF8URLComponentWithAdjustments(
129       url, UnescapeRule::SPACES, &adjustments);
130 }
131
132 TEST(EscapeTest, EscapeUrlEncodedData) {
133   ASSERT_EQ(
134     // Most of the character space we care about, un-escaped
135     EscapeUrlEncodedData(
136       "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
137       "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
138       "[\\]^_`abcdefghijklmnopqrstuvwxyz"
139       "{|}~\x7f\x80\xff", true),
140     // Escaped
141     "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B"
142     "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ"
143     "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
144     "%7B%7C%7D~%7F%80%FF");
145 }
146
147 TEST(EscapeTest, EscapeUrlEncodedDataSpace) {
148   ASSERT_EQ(EscapeUrlEncodedData("a b", true), "a+b");
149   ASSERT_EQ(EscapeUrlEncodedData("a b", false), "a%20b");
150 }
151
152 TEST(EscapeTest, UnescapeURLComponentASCII) {
153   const UnescapeURLCaseASCII unescape_cases[] = {
154     {"", UnescapeRule::NORMAL, ""},
155     {"%2", UnescapeRule::NORMAL, "%2"},
156     {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"},
157     {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"},
158     {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},
159     {"Some%20random text %25%2dOK", UnescapeRule::NONE,
160      "Some%20random text %25%2dOK"},
161     {"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
162      "Some%20random text %25-OK"},
163     {"Some%20random text %25%2dOK", UnescapeRule::SPACES,
164      "Some random text %25-OK"},
165     {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
166      "Some%20random text %-OK"},
167     {"Some%20random text %25%2dOK",
168      UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
169      "Some random text %-OK"},
170     {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"},
171     {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"},
172     // Certain URL-sensitive characters should not be unescaped unless asked.
173     {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
174      "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
175     {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
176      UnescapeRule::URL_SPECIAL_CHARS,
177      "Hello%20%13%10world ## ?? == && %% ++"},
178     // We can neither escape nor unescape '@' since some websites expect it to
179     // be preserved as either '@' or "%40".
180     // See http://b/996720 and http://crbug.com/23933 .
181     {"me@my%40example", UnescapeRule::NORMAL, "me@my%40example"},
182     // Control characters.
183     {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
184      "%01%02%03%04%05%06%07%08%09 %"},
185     {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
186      "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
187     {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"},
188     {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"},
189   };
190
191   for (size_t i = 0; i < arraysize(unescape_cases); i++) {
192     std::string str(unescape_cases[i].input);
193     EXPECT_EQ(std::string(unescape_cases[i].output),
194               UnescapeURLComponent(str, unescape_cases[i].rules));
195   }
196
197   // Test the NULL character unescaping (which wouldn't work above since those
198   // are just char pointers).
199   std::string input("Null");
200   input.push_back(0);  // Also have a NULL in the input.
201   input.append("%00%39Test");
202
203   // When we're unescaping NULLs
204   std::string expected("Null");
205   expected.push_back(0);
206   expected.push_back(0);
207   expected.append("9Test");
208   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
209
210   // When we're not unescaping NULLs.
211   expected = "Null";
212   expected.push_back(0);
213   expected.append("%009Test");
214   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
215 }
216
217 TEST(EscapeTest, UnescapeURLComponent) {
218   const UnescapeURLCase unescape_cases[] = {
219     {L"", UnescapeRule::NORMAL, L""},
220     {L"%2", UnescapeRule::NORMAL, L"%2"},
221     {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"},
222     {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"},
223     {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"},
224     {L"Some%20random text %25%2dOK", UnescapeRule::NONE,
225      L"Some%20random text %25%2dOK"},
226     {L"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
227      L"Some%20random text %25-OK"},
228     {L"Some%20random text %25%E2%80", UnescapeRule::NORMAL,
229      L"Some%20random text %25\xE2\x80"},
230     {L"Some%20random text %25%E2%80OK", UnescapeRule::NORMAL,
231      L"Some%20random text %25\xE2\x80OK"},
232     {L"Some%20random text %25%E2%80%84OK", UnescapeRule::NORMAL,
233      L"Some%20random text %25\xE2\x80\x84OK"},
234
235     // BiDi Control characters should not be unescaped.
236     {L"Some%20random text %25%D8%9COK", UnescapeRule::NORMAL,
237      L"Some%20random text %25%D8%9COK"},
238     {L"Some%20random text %25%E2%80%8EOK", UnescapeRule::NORMAL,
239      L"Some%20random text %25%E2%80%8EOK"},
240     {L"Some%20random text %25%E2%80%8FOK", UnescapeRule::NORMAL,
241      L"Some%20random text %25%E2%80%8FOK"},
242     {L"Some%20random text %25%E2%80%AAOK", UnescapeRule::NORMAL,
243      L"Some%20random text %25%E2%80%AAOK"},
244     {L"Some%20random text %25%E2%80%ABOK", UnescapeRule::NORMAL,
245      L"Some%20random text %25%E2%80%ABOK"},
246     {L"Some%20random text %25%E2%80%AEOK", UnescapeRule::NORMAL,
247      L"Some%20random text %25%E2%80%AEOK"},
248     {L"Some%20random text %25%E2%81%A6OK", UnescapeRule::NORMAL,
249      L"Some%20random text %25%E2%81%A6OK"},
250     {L"Some%20random text %25%E2%81%A9OK", UnescapeRule::NORMAL,
251      L"Some%20random text %25%E2%81%A9OK"},
252
253     {L"Some%20random text %25%2dOK", UnescapeRule::SPACES,
254      L"Some random text %25-OK"},
255     {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
256      L"Some%20random text %-OK"},
257     {L"Some%20random text %25%2dOK",
258      UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
259      L"Some random text %-OK"},
260     {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"},
261     {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"},
262     // Certain URL-sensitive characters should not be unescaped unless asked.
263     {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
264      L"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
265     {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
266      UnescapeRule::URL_SPECIAL_CHARS,
267      L"Hello%20%13%10world ## ?? == && %% ++"},
268     // We can neither escape nor unescape '@' since some websites expect it to
269     // be preserved as either '@' or "%40".
270     // See http://b/996720 and http://crbug.com/23933 .
271     {L"me@my%40example", UnescapeRule::NORMAL, L"me@my%40example"},
272     // Control characters.
273     {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
274      L"%01%02%03%04%05%06%07%08%09 %"},
275     {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
276      L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
277     {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"},
278     {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS,
279      L"Hello%20\x13\x10\x02"},
280     {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS,
281      L"Hello\x9824\x9827"},
282   };
283
284   for (size_t i = 0; i < arraysize(unescape_cases); i++) {
285     base::string16 str(base::WideToUTF16(unescape_cases[i].input));
286     EXPECT_EQ(base::WideToUTF16(unescape_cases[i].output),
287               UnescapeURLComponent(str, unescape_cases[i].rules));
288   }
289
290   // Test the NULL character unescaping (which wouldn't work above since those
291   // are just char pointers).
292   base::string16 input(base::WideToUTF16(L"Null"));
293   input.push_back(0);  // Also have a NULL in the input.
294   input.append(base::WideToUTF16(L"%00%39Test"));
295
296   // When we're unescaping NULLs
297   base::string16 expected(base::WideToUTF16(L"Null"));
298   expected.push_back(0);
299   expected.push_back(0);
300   expected.append(base::ASCIIToUTF16("9Test"));
301   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
302
303   // When we're not unescaping NULLs.
304   expected = base::WideToUTF16(L"Null");
305   expected.push_back(0);
306   expected.append(base::WideToUTF16(L"%009Test"));
307   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
308 }
309
310 TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) {
311   const UnescapeAndDecodeCase unescape_cases[] = {
312     { "%",
313       "%",
314       "%",
315      L"%"},
316     { "+",
317       "+",
318       " ",
319      L"+"},
320     { "%2+",
321       "%2+",
322       "%2 ",
323      L"%2+"},
324     { "+%%%+%%%",
325       "+%%%+%%%",
326       " %%% %%%",
327      L"+%%%+%%%"},
328     { "Don't escape anything",
329       "Don't escape anything",
330       "Don't escape anything",
331      L"Don't escape anything"},
332     { "+Invalid %escape %2+",
333       "+Invalid %escape %2+",
334       " Invalid %escape %2 ",
335      L"+Invalid %escape %2+"},
336     { "Some random text %25%2dOK",
337       "Some random text %25-OK",
338       "Some random text %25-OK",
339      L"Some random text %25-OK"},
340     { "%01%02%03%04%05%06%07%08%09",
341       "%01%02%03%04%05%06%07%08%09",
342       "%01%02%03%04%05%06%07%08%09",
343      L"%01%02%03%04%05%06%07%08%09"},
344     { "%E4%BD%A0+%E5%A5%BD",
345       "\xE4\xBD\xA0+\xE5\xA5\xBD",
346       "\xE4\xBD\xA0 \xE5\xA5\xBD",
347      L"\x4f60+\x597d"},
348     { "%ED%ED",  // Invalid UTF-8.
349       "\xED\xED",
350       "\xED\xED",
351      L"%ED%ED"},  // Invalid UTF-8 -> kept unescaped.
352   };
353
354   for (size_t i = 0; i < arraysize(unescape_cases); i++) {
355     std::string unescaped = UnescapeURLComponent(unescape_cases[i].input,
356                                                  UnescapeRule::NORMAL);
357     EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped);
358
359     unescaped = UnescapeURLComponent(unescape_cases[i].input,
360                                      UnescapeRule::REPLACE_PLUS_WITH_SPACE);
361     EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped);
362
363     // TODO: Need to test unescape_spaces and unescape_percent.
364     base::string16 decoded = UnescapeAndDecodeUTF8URLComponent(
365         unescape_cases[i].input, UnescapeRule::NORMAL);
366     EXPECT_EQ(base::WideToUTF16(unescape_cases[i].decoded), decoded);
367   }
368 }
369
370 TEST(EscapeTest, AdjustOffset) {
371   const AdjustOffsetCase adjust_cases[] = {
372     {"", 0, 0},
373     {"test", 0, 0},
374     {"test", 2, 2},
375     {"test", 4, 4},
376     {"test", std::string::npos, std::string::npos},
377     {"%2dtest", 6, 4},
378     {"%2dtest", 3, 1},
379     {"%2dtest", 2, std::string::npos},
380     {"%2dtest", 1, std::string::npos},
381     {"%2dtest", 0, 0},
382     {"test%2d", 2, 2},
383     {"%E4%BD%A0+%E5%A5%BD", 9, 1},
384     {"%E4%BD%A0+%E5%A5%BD", 6, std::string::npos},
385     {"%E4%BD%A0+%E5%A5%BD", 0, 0},
386     {"%E4%BD%A0+%E5%A5%BD", 10, 2},
387     {"%E4%BD%A0+%E5%A5%BD", 19, 3},
388
389     {"hi%41test%E4%BD%A0+%E5%A5%BD", 18, 8},
390     {"hi%41test%E4%BD%A0+%E5%A5%BD", 15, std::string::npos},
391     {"hi%41test%E4%BD%A0+%E5%A5%BD", 9, 7},
392     {"hi%41test%E4%BD%A0+%E5%A5%BD", 19, 9},
393     {"hi%41test%E4%BD%A0+%E5%A5%BD", 28, 10},
394     {"hi%41test%E4%BD%A0+%E5%A5%BD", 0, 0},
395     {"hi%41test%E4%BD%A0+%E5%A5%BD", 2, 2},
396     {"hi%41test%E4%BD%A0+%E5%A5%BD", 3, std::string::npos},
397     {"hi%41test%E4%BD%A0+%E5%A5%BD", 5, 3},
398
399     {"%E4%BD%A0+%E5%A5%BDhi%41test", 9, 1},
400     {"%E4%BD%A0+%E5%A5%BDhi%41test", 6, std::string::npos},
401     {"%E4%BD%A0+%E5%A5%BDhi%41test", 0, 0},
402     {"%E4%BD%A0+%E5%A5%BDhi%41test", 10, 2},
403     {"%E4%BD%A0+%E5%A5%BDhi%41test", 19, 3},
404     {"%E4%BD%A0+%E5%A5%BDhi%41test", 21, 5},
405     {"%E4%BD%A0+%E5%A5%BDhi%41test", 22, std::string::npos},
406     {"%E4%BD%A0+%E5%A5%BDhi%41test", 24, 6},
407     {"%E4%BD%A0+%E5%A5%BDhi%41test", 28, 10},
408
409     {"%ED%B0%80+%E5%A5%BD", 6, 6},  // not convertable to UTF-8
410   };
411
412   for (size_t i = 0; i < arraysize(adjust_cases); i++) {
413     size_t offset = adjust_cases[i].input_offset;
414     base::OffsetAdjuster::Adjustments adjustments;
415     UnescapeAndDecodeUTF8URLComponentWithAdjustments(
416         adjust_cases[i].input, UnescapeRule::NORMAL, &adjustments);
417     base::OffsetAdjuster::AdjustOffset(adjustments, &offset);
418     EXPECT_EQ(adjust_cases[i].output_offset, offset)
419         << "input=" << adjust_cases[i].input
420         << " offset=" << adjust_cases[i].input_offset;
421   }
422 }
423
424 TEST(EscapeTest, EscapeForHTML) {
425   const EscapeForHTMLCase tests[] = {
426     { "hello", "hello" },
427     { "<hello>", "&lt;hello&gt;" },
428     { "don\'t mess with me", "don&#39;t mess with me" },
429   };
430   for (size_t i = 0; i < arraysize(tests); ++i) {
431     std::string result = EscapeForHTML(std::string(tests[i].input));
432     EXPECT_EQ(std::string(tests[i].expected_output), result);
433   }
434 }
435
436 TEST(EscapeTest, UnescapeForHTML) {
437   const EscapeForHTMLCase tests[] = {
438     { "", "" },
439     { "&lt;hello&gt;", "<hello>" },
440     { "don&#39;t mess with me", "don\'t mess with me" },
441     { "&lt;&gt;&amp;&quot;&#39;", "<>&\"'" },
442     { "& lt; &amp ; &; '", "& lt; &amp ; &; '" },
443     { "&amp;", "&" },
444     { "&quot;", "\"" },
445     { "&#39;", "'" },
446     { "&lt;", "<" },
447     { "&gt;", ">" },
448     { "&amp; &", "& &" },
449   };
450   for (size_t i = 0; i < arraysize(tests); ++i) {
451     base::string16 result = UnescapeForHTML(base::ASCIIToUTF16(tests[i].input));
452     EXPECT_EQ(base::ASCIIToUTF16(tests[i].expected_output), result);
453   }
454 }
455
456
457 }  // namespace
458 }  // namespace net