Upstream version 10.39.225.0
[platform/framework/web/crosswalk.git] / src / components / url_fixer / url_fixer_unittest.cc
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <stdlib.h>
6
7 #include "base/base_paths.h"
8 #include "base/basictypes.h"
9 #include "base/files/file_path.h"
10 #include "base/files/file_util.h"
11 #include "base/files/scoped_temp_dir.h"
12 #include "base/path_service.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "components/url_fixer/url_fixer.h"
16 #include "net/base/filename_util.h"
17 #include "testing/gtest/include/gtest/gtest.h"
18 #include "url/gurl.h"
19 #include "url/url_parse.h"
20
21 namespace url {
22
23 std::ostream& operator<<(std::ostream& os, const Component& part) {
24   return os << "(begin=" << part.begin << ", len=" << part.len << ")";
25 }
26
27 }  // namespace url
28
29 struct SegmentCase {
30   const std::string input;
31   const std::string result;
32   const url::Component scheme;
33   const url::Component username;
34   const url::Component password;
35   const url::Component host;
36   const url::Component port;
37   const url::Component path;
38   const url::Component query;
39   const url::Component ref;
40 };
41
42 static const SegmentCase segment_cases[] = {
43   { "http://www.google.com/", "http",
44     url::Component(0, 4), // scheme
45     url::Component(), // username
46     url::Component(), // password
47     url::Component(7, 14), // host
48     url::Component(), // port
49     url::Component(21, 1), // path
50     url::Component(), // query
51     url::Component(), // ref
52   },
53   { "aBoUt:vErSiOn", "about",
54     url::Component(0, 5), // scheme
55     url::Component(), // username
56     url::Component(), // password
57     url::Component(6, 7), // host
58     url::Component(), // port
59     url::Component(), // path
60     url::Component(), // query
61     url::Component(), // ref
62   },
63   { "about:host/path?query#ref", "about",
64     url::Component(0, 5), // scheme
65     url::Component(), // username
66     url::Component(), // password
67     url::Component(6, 4), // host
68     url::Component(), // port
69     url::Component(10, 5), // path
70     url::Component(16, 5), // query
71     url::Component(22, 3), // ref
72   },
73   { "about://host/path?query#ref", "about",
74     url::Component(0, 5), // scheme
75     url::Component(), // username
76     url::Component(), // password
77     url::Component(8, 4), // host
78     url::Component(), // port
79     url::Component(12, 5), // path
80     url::Component(18, 5), // query
81     url::Component(24, 3), // ref
82   },
83   { "chrome:host/path?query#ref", "chrome",
84     url::Component(0, 6), // scheme
85     url::Component(), // username
86     url::Component(), // password
87     url::Component(7, 4), // host
88     url::Component(), // port
89     url::Component(11, 5), // path
90     url::Component(17, 5), // query
91     url::Component(23, 3), // ref
92   },
93   { "chrome://host/path?query#ref", "chrome",
94     url::Component(0, 6), // scheme
95     url::Component(), // username
96     url::Component(), // password
97     url::Component(9, 4), // host
98     url::Component(), // port
99     url::Component(13, 5), // path
100     url::Component(19, 5), // query
101     url::Component(25, 3), // ref
102   },
103   { "    www.google.com:124?foo#", "http",
104     url::Component(), // scheme
105     url::Component(), // username
106     url::Component(), // password
107     url::Component(4, 14), // host
108     url::Component(19, 3), // port
109     url::Component(), // path
110     url::Component(23, 3), // query
111     url::Component(27, 0), // ref
112   },
113   { "user@www.google.com", "http",
114     url::Component(), // scheme
115     url::Component(0, 4), // username
116     url::Component(), // password
117     url::Component(5, 14), // host
118     url::Component(), // port
119     url::Component(), // path
120     url::Component(), // query
121     url::Component(), // ref
122   },
123   { "ftp:/user:P:a$$Wd@..ftp.google.com...::23///pub?foo#bar", "ftp",
124     url::Component(0, 3), // scheme
125     url::Component(5, 4), // username
126     url::Component(10, 7), // password
127     url::Component(18, 20), // host
128     url::Component(39, 2), // port
129     url::Component(41, 6), // path
130     url::Component(48, 3), // query
131     url::Component(52, 3), // ref
132   },
133   { "[2001:db8::1]/path", "http",
134     url::Component(), // scheme
135     url::Component(), // username
136     url::Component(), // password
137     url::Component(0, 13), // host
138     url::Component(), // port
139     url::Component(13, 5), // path
140     url::Component(), // query
141     url::Component(), // ref
142   },
143   { "[::1]", "http",
144     url::Component(), // scheme
145     url::Component(), // username
146     url::Component(), // password
147     url::Component(0, 5), // host
148     url::Component(), // port
149     url::Component(), // path
150     url::Component(), // query
151     url::Component(), // ref
152   },
153   // Incomplete IPv6 addresses (will not canonicalize).
154   { "[2001:4860:", "http",
155     url::Component(), // scheme
156     url::Component(), // username
157     url::Component(), // password
158     url::Component(0, 11), // host
159     url::Component(), // port
160     url::Component(), // path
161     url::Component(), // query
162     url::Component(), // ref
163   },
164   { "[2001:4860:/foo", "http",
165     url::Component(), // scheme
166     url::Component(), // username
167     url::Component(), // password
168     url::Component(0, 11), // host
169     url::Component(), // port
170     url::Component(11, 4), // path
171     url::Component(), // query
172     url::Component(), // ref
173   },
174   { "http://:b005::68]", "http",
175     url::Component(0, 4), // scheme
176     url::Component(), // username
177     url::Component(), // password
178     url::Component(7, 10), // host
179     url::Component(), // port
180     url::Component(), // path
181     url::Component(), // query
182     url::Component(), // ref
183   },
184   // Can't do anything useful with this.
185   { ":b005::68]", "",
186     url::Component(0, 0), // scheme
187     url::Component(), // username
188     url::Component(), // password
189     url::Component(), // host
190     url::Component(), // port
191     url::Component(), // path
192     url::Component(), // query
193     url::Component(), // ref
194   },
195 };
196
197 typedef testing::Test URLFixerTest;
198
199 TEST(URLFixerTest, SegmentURL) {
200   std::string result;
201   url::Parsed parts;
202
203   for (size_t i = 0; i < arraysize(segment_cases); ++i) {
204     SegmentCase value = segment_cases[i];
205     result = url_fixer::SegmentURL(value.input, &parts);
206     EXPECT_EQ(value.result, result);
207     EXPECT_EQ(value.scheme, parts.scheme);
208     EXPECT_EQ(value.username, parts.username);
209     EXPECT_EQ(value.password, parts.password);
210     EXPECT_EQ(value.host, parts.host);
211     EXPECT_EQ(value.port, parts.port);
212     EXPECT_EQ(value.path, parts.path);
213     EXPECT_EQ(value.query, parts.query);
214     EXPECT_EQ(value.ref, parts.ref);
215   }
216 }
217
218 // Creates a file and returns its full name as well as the decomposed
219 // version. Example:
220 //    full_path = "c:\foo\bar.txt"
221 //    dir = "c:\foo"
222 //    file_name = "bar.txt"
223 static bool MakeTempFile(const base::FilePath& dir,
224                          const base::FilePath& file_name,
225                          base::FilePath* full_path) {
226   *full_path = dir.Append(file_name);
227   return base::WriteFile(*full_path, "", 0) == 0;
228 }
229
230 // Returns true if the given URL is a file: URL that matches the given file
231 static bool IsMatchingFileURL(const std::string& url,
232                               const base::FilePath& full_file_path) {
233   if (url.length() <= 8)
234     return false;
235   if (std::string("file:///") != url.substr(0, 8))
236     return false; // no file:/// prefix
237   if (url.find('\\') != std::string::npos)
238     return false; // contains backslashes
239
240   base::FilePath derived_path;
241   net::FileURLToFilePath(GURL(url), &derived_path);
242
243   return base::FilePath::CompareEqualIgnoreCase(derived_path.value(),
244                                           full_file_path.value());
245 }
246
247 struct FixupCase {
248   const std::string input;
249   const std::string output;
250 } fixup_cases[] = {
251   {"www.google.com", "http://www.google.com/"},
252   {" www.google.com     ", "http://www.google.com/"},
253   {" foo.com/asdf  bar", "http://foo.com/asdf%20%20bar"},
254   {"..www.google.com..", "http://www.google.com./"},
255   {"http://......", "http://....../"},
256   {"http://host.com:ninety-two/", "http://host.com:ninety-two/"},
257   {"http://host.com:ninety-two?foo", "http://host.com:ninety-two/?foo"},
258   {"google.com:123", "http://google.com:123/"},
259   {"about:", "chrome://version/"},
260   {"about:foo", "chrome://foo/"},
261   {"about:version", "chrome://version/"},
262   {"about:blank", "about:blank"},
263   {"about:usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"},
264   {"about://usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"},
265   {"chrome:usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"},
266   {"chrome://usr:pwd@hst/pth?qry#ref", "chrome://usr:pwd@hst/pth?qry#ref"},
267   {"www:123", "http://www:123/"},
268   {"   www:123", "http://www:123/"},
269   {"www.google.com?foo", "http://www.google.com/?foo"},
270   {"www.google.com#foo", "http://www.google.com/#foo"},
271   {"www.google.com?", "http://www.google.com/?"},
272   {"www.google.com#", "http://www.google.com/#"},
273   {"www.google.com:123?foo#bar", "http://www.google.com:123/?foo#bar"},
274   {"user@www.google.com", "http://user@www.google.com/"},
275   {"\xE6\xB0\xB4.com", "http://xn--1rw.com/"},
276   // It would be better if this next case got treated as http, but I don't see
277   // a clean way to guess this isn't the new-and-exciting "user" scheme.
278   {"user:passwd@www.google.com:8080/", "user:passwd@www.google.com:8080/"},
279   // {"file:///c:/foo/bar%20baz.txt", "file:///C:/foo/bar%20baz.txt"},
280   {"ftp.google.com", "ftp://ftp.google.com/"},
281   {"    ftp.google.com", "ftp://ftp.google.com/"},
282   {"FTP.GooGle.com", "ftp://ftp.google.com/"},
283   {"ftpblah.google.com", "http://ftpblah.google.com/"},
284   {"ftp", "http://ftp/"},
285   {"google.ftp.com", "http://google.ftp.com/"},
286   // URLs which end with 0x85 (NEL in ISO-8859).
287   {"http://foo.com/s?q=\xd0\x85", "http://foo.com/s?q=%D0%85"},
288   {"http://foo.com/s?q=\xec\x97\x85", "http://foo.com/s?q=%EC%97%85"},
289   {"http://foo.com/s?q=\xf0\x90\x80\x85", "http://foo.com/s?q=%F0%90%80%85"},
290   // URLs which end with 0xA0 (non-break space in ISO-8859).
291   {"http://foo.com/s?q=\xd0\xa0", "http://foo.com/s?q=%D0%A0"},
292   {"http://foo.com/s?q=\xec\x97\xa0", "http://foo.com/s?q=%EC%97%A0"},
293   {"http://foo.com/s?q=\xf0\x90\x80\xa0", "http://foo.com/s?q=%F0%90%80%A0"},
294   // URLs containing IPv6 literals.
295   {"[2001:db8::2]", "http://[2001:db8::2]/"},
296   {"[::]:80", "http://[::]/"},
297   {"[::]:80/path", "http://[::]/path"},
298   {"[::]:180/path", "http://[::]:180/path"},
299   // TODO(pmarks): Maybe we should parse bare IPv6 literals someday.
300   {"::1", "::1"},
301   // Semicolon as scheme separator for standard schemes.
302   {"http;//www.google.com/", "http://www.google.com/"},
303   {"about;chrome", "chrome://chrome/"},
304   // Semicolon left as-is for non-standard schemes.
305   {"whatsup;//fool", "whatsup://fool"},
306   // Semicolon left as-is in URL itself.
307   {"http://host/port?query;moar", "http://host/port?query;moar"},
308   // Fewer slashes than expected.
309   {"http;www.google.com/", "http://www.google.com/"},
310   {"http;/www.google.com/", "http://www.google.com/"},
311   // Semicolon at start.
312   {";http://www.google.com/", "http://%3Bhttp//www.google.com/"},
313 };
314
315 TEST(URLFixerTest, FixupURL) {
316   for (size_t i = 0; i < arraysize(fixup_cases); ++i) {
317     FixupCase value = fixup_cases[i];
318     EXPECT_EQ(value.output,
319               url_fixer::FixupURL(value.input, "").possibly_invalid_spec())
320         << "input: " << value.input;
321   }
322
323   // Check the TLD-appending functionality.
324   FixupCase tld_cases[] = {
325       {"somedomainthatwillnotbeagtld",
326        "http://www.somedomainthatwillnotbeagtld.com/"},
327       {"somedomainthatwillnotbeagtld.",
328        "http://www.somedomainthatwillnotbeagtld.com/"},
329       {"somedomainthatwillnotbeagtld..",
330        "http://www.somedomainthatwillnotbeagtld.com/"},
331       {".somedomainthatwillnotbeagtld",
332        "http://www.somedomainthatwillnotbeagtld.com/"},
333       {"www.somedomainthatwillnotbeagtld",
334        "http://www.somedomainthatwillnotbeagtld.com/"},
335       {"somedomainthatwillnotbeagtld.com",
336        "http://somedomainthatwillnotbeagtld.com/"},
337       {"http://somedomainthatwillnotbeagtld",
338        "http://www.somedomainthatwillnotbeagtld.com/"},
339       {"..somedomainthatwillnotbeagtld..",
340        "http://www.somedomainthatwillnotbeagtld.com/"},
341       {"http://www.somedomainthatwillnotbeagtld",
342        "http://www.somedomainthatwillnotbeagtld.com/"},
343       {"9999999999999999", "http://www.9999999999999999.com/"},
344       {"somedomainthatwillnotbeagtld/foo",
345        "http://www.somedomainthatwillnotbeagtld.com/foo"},
346       {"somedomainthatwillnotbeagtld.com/foo",
347        "http://somedomainthatwillnotbeagtld.com/foo"},
348       {"somedomainthatwillnotbeagtld/?foo=.com",
349        "http://www.somedomainthatwillnotbeagtld.com/?foo=.com"},
350       {"www.somedomainthatwillnotbeagtld/?foo=www.",
351        "http://www.somedomainthatwillnotbeagtld.com/?foo=www."},
352       {"somedomainthatwillnotbeagtld.com/?foo=.com",
353        "http://somedomainthatwillnotbeagtld.com/?foo=.com"},
354       {"http://www.somedomainthatwillnotbeagtld.com",
355        "http://www.somedomainthatwillnotbeagtld.com/"},
356       {"somedomainthatwillnotbeagtld:123",
357        "http://www.somedomainthatwillnotbeagtld.com:123/"},
358       {"http://somedomainthatwillnotbeagtld:123",
359        "http://www.somedomainthatwillnotbeagtld.com:123/"},
360   };
361   for (size_t i = 0; i < arraysize(tld_cases); ++i) {
362     FixupCase value = tld_cases[i];
363     EXPECT_EQ(value.output,
364               url_fixer::FixupURL(value.input, "com").possibly_invalid_spec());
365   }
366 }
367
368 // Test different types of file inputs to URIFixerUpper::FixupURL. This
369 // doesn't go into the nice array of fixups above since the file input
370 // has to exist.
371 TEST(URLFixerTest, FixupFile) {
372   // this "original" filename is the one we tweak to get all the variations
373   base::ScopedTempDir temp_dir_;
374   ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
375   base::FilePath original;
376   ASSERT_TRUE(MakeTempFile(
377       temp_dir_.path(),
378       base::FilePath(FILE_PATH_LITERAL("url fixer upper existing file.txt")),
379       &original));
380
381   // reference path
382   GURL golden(net::FilePathToFileURL(original));
383
384   // c:\foo\bar.txt -> file:///c:/foo/bar.txt (basic)
385   GURL fixedup(url_fixer::FixupURL(original.AsUTF8Unsafe(), std::string()));
386   EXPECT_EQ(golden, fixedup);
387
388   // TODO(port): Make some equivalent tests for posix.
389 #if defined(OS_WIN)
390   // c|/foo\bar.txt -> file:///c:/foo/bar.txt (pipe allowed instead of colon)
391   std::string cur(base::WideToUTF8(original.value()));
392   EXPECT_EQ(':', cur[1]);
393   cur[1] = '|';
394   EXPECT_EQ(golden, url_fixer::FixupURL(cur, std::string()));
395
396   FixupCase cases[] = {
397     {"c:\\Non-existent%20file.txt", "file:///C:/Non-existent%2520file.txt"},
398
399     // \\foo\bar.txt -> file://foo/bar.txt
400     // UNC paths, this file won't exist, but since there are no escapes, it
401     // should be returned just converted to a file: URL.
402     {"\\\\NonexistentHost\\foo\\bar.txt", "file://nonexistenthost/foo/bar.txt"},
403     // We do this strictly, like IE8, which only accepts this form using
404     // backslashes and not forward ones.  Turning "//foo" into "http" matches
405     // Firefox and IE, silly though it may seem (it falls out of adding "http"
406     // as the default protocol if you haven't entered one).
407     {"//NonexistentHost\\foo/bar.txt", "http://nonexistenthost/foo/bar.txt"},
408     {"file:///C:/foo/bar", "file:///C:/foo/bar"},
409
410     // Much of the work here comes from GURL's canonicalization stage.
411     {"file://C:/foo/bar", "file:///C:/foo/bar"},
412     {"file:c:", "file:///C:/"},
413     {"file:c:WINDOWS", "file:///C:/WINDOWS"},
414     {"file:c|Program Files", "file:///C:/Program%20Files"},
415     {"file:/file", "file://file/"},
416     {"file:////////c:\\foo", "file:///C:/foo"},
417     {"file://server/folder/file", "file://server/folder/file"},
418
419     // These are fixups we don't do, but could consider:
420     //   {"file:///foo:/bar", "file://foo/bar"},
421     //   {"file:/\\/server\\folder/file", "file://server/folder/file"},
422   };
423 #elif defined(OS_POSIX)
424
425 #if defined(OS_MACOSX)
426 #define HOME "/Users/"
427 #else
428 #define HOME "/home/"
429 #endif
430   url_fixer::home_directory_override = "/foo";
431   FixupCase cases[] = {
432     // File URLs go through GURL, which tries to escape intelligently.
433     {"/A%20non-existent file.txt", "file:///A%2520non-existent%20file.txt"},
434     // A plain "/" refers to the root.
435     {"/", "file:///"},
436
437     // These rely on the above home_directory_override.
438     {"~", "file:///foo"},
439     {"~/bar", "file:///foo/bar"},
440
441     // References to other users' homedirs.
442     {"~foo", "file://" HOME "foo"},
443     {"~x/blah", "file://" HOME "x/blah"},
444   };
445 #endif
446
447   for (size_t i = 0; i < arraysize(cases); i++) {
448     EXPECT_EQ(cases[i].output,
449               url_fixer::FixupURL(cases[i].input, "").possibly_invalid_spec());
450   }
451
452   EXPECT_TRUE(base::DeleteFile(original, false));
453 }
454
455 TEST(URLFixerTest, FixupRelativeFile) {
456   base::FilePath full_path;
457   base::FilePath file_part(
458       FILE_PATH_LITERAL("url_fixer_upper_existing_file.txt"));
459   base::ScopedTempDir temp_dir_;
460   ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
461   ASSERT_TRUE(MakeTempFile(temp_dir_.path(), file_part, &full_path));
462   full_path = base::MakeAbsoluteFilePath(full_path);
463   ASSERT_FALSE(full_path.empty());
464
465   // make sure we pass through good URLs
466   for (size_t i = 0; i < arraysize(fixup_cases); ++i) {
467     FixupCase value = fixup_cases[i];
468     base::FilePath input = base::FilePath::FromUTF8Unsafe(value.input);
469     EXPECT_EQ(value.output,
470               url_fixer::FixupRelativeFile(temp_dir_.path(),
471                   input).possibly_invalid_spec());
472   }
473
474   // make sure the existing file got fixed-up to a file URL, and that there
475   // are no backslashes
476   EXPECT_TRUE(IsMatchingFileURL(
477       url_fixer::FixupRelativeFile(temp_dir_.path(),
478           file_part).possibly_invalid_spec(), full_path));
479   EXPECT_TRUE(base::DeleteFile(full_path, false));
480
481   // create a filename we know doesn't exist and make sure it doesn't get
482   // fixed up to a file URL
483   base::FilePath nonexistent_file(
484       FILE_PATH_LITERAL("url_fixer_upper_nonexistent_file.txt"));
485   std::string fixedup(url_fixer::FixupRelativeFile(
486       temp_dir_.path(), nonexistent_file).possibly_invalid_spec());
487   EXPECT_NE(std::string("file:///"), fixedup.substr(0, 8));
488   EXPECT_FALSE(IsMatchingFileURL(fixedup, nonexistent_file));
489
490   // make a subdir to make sure relative paths with directories work, also
491   // test spaces:
492   // "app_dir\url fixer-upper dir\url fixer-upper existing file.txt"
493   base::FilePath sub_dir(FILE_PATH_LITERAL("url fixer-upper dir"));
494   base::FilePath sub_file(
495       FILE_PATH_LITERAL("url fixer-upper existing file.txt"));
496   base::FilePath new_dir = temp_dir_.path().Append(sub_dir);
497   base::CreateDirectory(new_dir);
498   ASSERT_TRUE(MakeTempFile(new_dir, sub_file, &full_path));
499   full_path = base::MakeAbsoluteFilePath(full_path);
500   ASSERT_FALSE(full_path.empty());
501
502   // test file in the subdir
503   base::FilePath relative_file = sub_dir.Append(sub_file);
504   EXPECT_TRUE(IsMatchingFileURL(
505       url_fixer::FixupRelativeFile(temp_dir_.path(),
506           relative_file).possibly_invalid_spec(), full_path));
507
508   // test file in the subdir with different slashes and escaping.
509   base::FilePath::StringType relative_file_str = sub_dir.value() +
510       FILE_PATH_LITERAL("/") + sub_file.value();
511   ReplaceSubstringsAfterOffset(&relative_file_str, 0,
512       FILE_PATH_LITERAL(" "), FILE_PATH_LITERAL("%20"));
513   EXPECT_TRUE(IsMatchingFileURL(
514       url_fixer::FixupRelativeFile(temp_dir_.path(),
515           base::FilePath(relative_file_str)).possibly_invalid_spec(),
516               full_path));
517
518   // test relative directories and duplicate slashes
519   // (should resolve to the same file as above)
520   relative_file_str = sub_dir.value() + FILE_PATH_LITERAL("/../") +
521       sub_dir.value() + FILE_PATH_LITERAL("///./") + sub_file.value();
522   EXPECT_TRUE(IsMatchingFileURL(
523       url_fixer::FixupRelativeFile(temp_dir_.path(),
524           base::FilePath(relative_file_str)).possibly_invalid_spec(),
525               full_path));
526
527   // done with the subdir
528   EXPECT_TRUE(base::DeleteFile(full_path, false));
529   EXPECT_TRUE(base::DeleteFile(new_dir, true));
530
531   // Test that an obvious HTTP URL isn't accidentally treated as an absolute
532   // file path (on account of system-specific craziness).
533   base::FilePath empty_path;
534   base::FilePath http_url_path(FILE_PATH_LITERAL("http://../"));
535   EXPECT_TRUE(
536       url_fixer::FixupRelativeFile(empty_path, http_url_path).SchemeIs("http"));
537 }