- add sources.
[platform/framework/web/crosswalk.git] / src / net / base / mime_sniffer_unittest.cc
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/basictypes.h"
6 #include "net/base/mime_sniffer.h"
7 #include "testing/gtest/include/gtest/gtest.h"
8 #include "url/gurl.h"
9
10 namespace net {
11
12 struct SnifferTest {
13   const char* content;
14   size_t content_len;
15   std::string url;
16   std::string type_hint;
17   const char* mime_type;
18 };
19
20 static void TestArray(SnifferTest* tests, size_t count) {
21   std::string mime_type;
22
23   for (size_t i = 0; i < count; ++i) {
24     SniffMimeType(tests[i].content,
25                        tests[i].content_len,
26                        GURL(tests[i].url),
27                        tests[i].type_hint,
28                        &mime_type);
29     EXPECT_EQ(tests[i].mime_type, mime_type);
30   }
31 }
32
33 // TODO(evanm): convert other tests to use SniffMimeType instead of TestArray,
34 // so the error messages produced by test failures are more useful.
35 static std::string SniffMimeType(const std::string& content,
36                                  const std::string& url,
37                                  const std::string& mime_type_hint) {
38   std::string mime_type;
39   SniffMimeType(content.data(), content.size(), GURL(url),
40                      mime_type_hint, &mime_type);
41   return mime_type;
42 }
43
44 TEST(MimeSnifferTest, BoundaryConditionsTest) {
45   std::string mime_type;
46   std::string type_hint;
47
48   char buf[] = {
49     'd', '\x1f', '\xFF'
50   };
51
52   GURL url;
53
54   SniffMimeType(buf, 0, url, type_hint, &mime_type);
55   EXPECT_EQ("text/plain", mime_type);
56   SniffMimeType(buf, 1, url, type_hint, &mime_type);
57   EXPECT_EQ("text/plain", mime_type);
58   SniffMimeType(buf, 2, url, type_hint, &mime_type);
59   EXPECT_EQ("application/octet-stream", mime_type);
60 }
61
62 TEST(MimeSnifferTest, BasicSniffingTest) {
63   SnifferTest tests[] = {
64     { "<!DOCTYPE html PUBLIC", sizeof("<!DOCTYPE html PUBLIC")-1,
65       "http://www.example.com/",
66       "", "text/html" },
67     { "<HtMl><Body></body></htMl>", sizeof("<HtMl><Body></body></htMl>")-1,
68       "http://www.example.com/foo.gif",
69       "application/octet-stream", "application/octet-stream" },
70     { "GIF89a\x1F\x83\x94", sizeof("GIF89a\xAF\x83\x94")-1,
71       "http://www.example.com/foo",
72       "text/plain", "image/gif" },
73     { "Gif87a\x1F\x83\x94", sizeof("Gif87a\xAF\x83\x94")-1,
74       "http://www.example.com/foo?param=tt.gif",
75       "", "application/octet-stream" },
76     { "%!PS-Adobe-3.0", sizeof("%!PS-Adobe-3.0")-1,
77       "http://www.example.com/foo",
78       "text/plain", "text/plain" },
79     { "\x89" "PNG\x0D\x0A\x1A\x0A", sizeof("\x89" "PNG\x0D\x0A\x1A\x0A")-1,
80       "http://www.example.com/foo",
81       "application/octet-stream", "application/octet-stream" },
82     { "\xFF\xD8\xFF\x23\x49\xAF", sizeof("\xFF\xD8\xFF\x23\x49\xAF")-1,
83       "http://www.example.com/foo",
84       "", "image/jpeg" },
85   };
86
87   TestArray(tests, arraysize(tests));
88 }
89
90 TEST(MimeSnifferTest, ChromeExtensionsTest) {
91   SnifferTest tests[] = {
92     // schemes
93     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
94       "http://www.example.com/foo.crx",
95       "", "application/x-chrome-extension" },
96     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
97       "https://www.example.com/foo.crx",
98       "", "application/x-chrome-extension" },
99     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
100       "ftp://www.example.com/foo.crx",
101       "", "application/x-chrome-extension" },
102
103     // some other mimetypes that should get converted
104     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
105       "http://www.example.com/foo.crx",
106       "text/plain", "application/x-chrome-extension" },
107     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
108       "http://www.example.com/foo.crx",
109       "application/octet-stream", "application/x-chrome-extension" },
110
111     // success edge cases
112     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
113       "http://www.example.com/foo.crx?query=string",
114       "", "application/x-chrome-extension" },
115     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
116       "http://www.example.com/foo..crx",
117       "", "application/x-chrome-extension" },
118
119     // wrong file extension
120     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
121       "http://www.example.com/foo.bin",
122       "", "application/octet-stream" },
123     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
124       "http://www.example.com/foo.bin?monkey",
125       "", "application/octet-stream" },
126     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
127       "invalid-url",
128       "", "application/octet-stream" },
129     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
130       "http://www.example.com",
131       "", "application/octet-stream" },
132     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
133       "http://www.example.com/",
134       "", "application/octet-stream" },
135     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
136       "http://www.example.com/foo",
137       "", "application/octet-stream" },
138     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
139       "http://www.example.com/foocrx",
140       "", "application/octet-stream" },
141     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
142       "http://www.example.com/foo.crx.blech",
143       "", "application/octet-stream" },
144
145     // wrong magic
146     { "Cr24\x02\x00\x00\x01", sizeof("Cr24\x02\x00\x00\x01")-1,
147       "http://www.example.com/foo.crx?monkey",
148       "", "application/octet-stream" },
149     { "PADDING_Cr24\x02\x00\x00\x00", sizeof("PADDING_Cr24\x02\x00\x00\x00")-1,
150       "http://www.example.com/foo.crx?monkey",
151       "", "application/octet-stream" },
152   };
153
154   TestArray(tests, arraysize(tests));
155 }
156
157 TEST(MimeSnifferTest, MozillaCompatibleTest) {
158   SnifferTest tests[] = {
159     { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1,
160       "http://www.example.com/",
161       "", "text/html" },
162     { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1,
163       "http://www.example.com/",
164       "text/plain", "text/plain" },
165     { "BMjlakdsfk", sizeof("BMjlakdsfk")-1,
166       "http://www.example.com/foo",
167       "", "image/bmp" },
168     { "\x00\x00\x30\x00", sizeof("\x00\x00\x30\x00")-1,
169       "http://www.example.com/favicon.ico",
170       "", "application/octet-stream" },
171     { "#!/bin/sh\nls /\n", sizeof("#!/bin/sh\nls /\n")-1,
172       "http://www.example.com/foo",
173       "", "text/plain" },
174     { "From: Fred\nTo: Bob\n\nHi\n.\n",
175       sizeof("From: Fred\nTo: Bob\n\nHi\n.\n")-1,
176       "http://www.example.com/foo",
177       "", "text/plain" },
178     { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
179       sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1,
180       "http://www.example.com/foo",
181       "", "text/xml" },
182     { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
183       sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1,
184       "http://www.example.com/foo",
185       "application/octet-stream", "application/octet-stream" },
186   };
187
188   TestArray(tests, arraysize(tests));
189 }
190
191 TEST(MimeSnifferTest, DontAllowPrivilegeEscalationTest) {
192   SnifferTest tests[] = {
193     { "GIF87a\n<html>\n<body>"
194         "<script>alert('haxorzed');\n</script>"
195         "</body></html>\n",
196       sizeof("GIF87a\n<html>\n<body>"
197         "<script>alert('haxorzed');\n</script>"
198         "</body></html>\n")-1,
199       "http://www.example.com/foo",
200       "", "image/gif" },
201     { "GIF87a\n<html>\n<body>"
202         "<script>alert('haxorzed');\n</script>"
203         "</body></html>\n",
204       sizeof("GIF87a\n<html>\n<body>"
205         "<script>alert('haxorzed');\n</script>"
206         "</body></html>\n")-1,
207       "http://www.example.com/foo?q=ttt.html",
208       "", "image/gif" },
209     { "GIF87a\n<html>\n<body>"
210         "<script>alert('haxorzed');\n</script>"
211         "</body></html>\n",
212       sizeof("GIF87a\n<html>\n<body>"
213         "<script>alert('haxorzed');\n</script>"
214         "</body></html>\n")-1,
215       "http://www.example.com/foo#ttt.html",
216       "", "image/gif" },
217     { "a\n<html>\n<body>"
218         "<script>alert('haxorzed');\n</script>"
219         "</body></html>\n",
220       sizeof("a\n<html>\n<body>"
221         "<script>alert('haxorzed');\n</script>"
222         "</body></html>\n")-1,
223       "http://www.example.com/foo",
224       "", "text/plain" },
225     { "a\n<html>\n<body>"
226         "<script>alert('haxorzed');\n</script>"
227         "</body></html>\n",
228       sizeof("a\n<html>\n<body>"
229         "<script>alert('haxorzed');\n</script>"
230         "</body></html>\n")-1,
231       "http://www.example.com/foo?q=ttt.html",
232       "", "text/plain" },
233     { "a\n<html>\n<body>"
234         "<script>alert('haxorzed');\n</script>"
235         "</body></html>\n",
236       sizeof("a\n<html>\n<body>"
237         "<script>alert('haxorzed');\n</script>"
238         "</body></html>\n")-1,
239       "http://www.example.com/foo#ttt.html",
240       "", "text/plain" },
241     { "a\n<html>\n<body>"
242         "<script>alert('haxorzed');\n</script>"
243         "</body></html>\n",
244       sizeof("a\n<html>\n<body>"
245         "<script>alert('haxorzed');\n</script>"
246         "</body></html>\n")-1,
247       "http://www.example.com/foo.html",
248       "", "text/plain" },
249   };
250
251   TestArray(tests, arraysize(tests));
252 }
253
254 TEST(MimeSnifferTest, UnicodeTest) {
255   SnifferTest tests[] = {
256     { "\xEF\xBB\xBF" "Hi there", sizeof("\xEF\xBB\xBF" "Hi there")-1,
257       "http://www.example.com/foo",
258       "", "text/plain" },
259     { "\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79",
260       sizeof("\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79")-1,
261       "http://www.example.com/foo",
262       "", "text/plain" },
263     { "\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9",
264       sizeof("\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9")-1,
265       "http://www.example.com/foo",
266       "", "text/plain" },
267     { "\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01",
268       sizeof("\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01")-1,
269       "http://www.example.com/foo",
270       "", "text/plain" },
271   };
272
273   TestArray(tests, arraysize(tests));
274 }
275
276 TEST(MimeSnifferTest, FlashTest) {
277   SnifferTest tests[] = {
278     { "CWSdd\x00\xB3", sizeof("CWSdd\x00\xB3")-1,
279       "http://www.example.com/foo",
280       "", "application/octet-stream" },
281     { "FLVjdkl*(#)0sdj\x00", sizeof("FLVjdkl*(#)0sdj\x00")-1,
282       "http://www.example.com/foo?q=ttt.swf",
283       "", "application/octet-stream" },
284     { "FWS3$9\r\b\x00", sizeof("FWS3$9\r\b\x00")-1,
285       "http://www.example.com/foo#ttt.swf",
286       "", "application/octet-stream" },
287     { "FLVjdkl*(#)0sdj", sizeof("FLVjdkl*(#)0sdj")-1,
288       "http://www.example.com/foo.swf",
289       "", "text/plain" },
290     { "FLVjdkl*(#)0s\x01dj", sizeof("FLVjdkl*(#)0s\x01dj")-1,
291       "http://www.example.com/foo/bar.swf",
292       "", "application/octet-stream" },
293     { "FWS3$9\r\b\x1A", sizeof("FWS3$9\r\b\x1A")-1,
294       "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar",
295       "", "application/octet-stream" },
296     { "FWS3$9\r\x1C\b", sizeof("FWS3$9\r\x1C\b")-1,
297       "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar",
298       "text/plain", "application/octet-stream" },
299   };
300
301   TestArray(tests, arraysize(tests));
302 }
303
304 TEST(MimeSnifferTest, XMLTest) {
305   // An easy feed to identify.
306   EXPECT_EQ("application/atom+xml",
307             SniffMimeType("<?xml?><feed", std::string(), "text/xml"));
308   // Don't sniff out of plain text.
309   EXPECT_EQ("text/plain",
310             SniffMimeType("<?xml?><feed", std::string(), "text/plain"));
311   // Simple RSS.
312   EXPECT_EQ("application/rss+xml",
313             SniffMimeType(
314                 "<?xml version='1.0'?>\r\n<rss", std::string(), "text/xml"));
315
316   // The top of CNN's RSS feed, which we'd like to recognize as RSS.
317   static const char kCNNRSS[] =
318       "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
319       "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/rss2full.xsl\" "
320       "type=\"text/xsl\" media=\"screen\"?>"
321       "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/itemcontent.css\" "
322       "type=\"text/css\" media=\"screen\"?>"
323       "<rss xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" "
324       "version=\"2.0\">";
325   // CNN's RSS
326   EXPECT_EQ("application/rss+xml",
327             SniffMimeType(kCNNRSS, std::string(), "text/xml"));
328   EXPECT_EQ("text/plain", SniffMimeType(kCNNRSS, std::string(), "text/plain"));
329
330   // Don't sniff random XML as something different.
331   EXPECT_EQ("text/xml",
332             SniffMimeType("<?xml?><notafeed", std::string(), "text/xml"));
333   // Don't sniff random plain-text as something different.
334   EXPECT_EQ("text/plain",
335             SniffMimeType("<?xml?><notafeed", std::string(), "text/plain"));
336
337   // Positive test for the two instances we upgrade to XHTML.
338   EXPECT_EQ("application/xhtml+xml",
339             SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
340                           std::string(),
341                           "text/xml"));
342   EXPECT_EQ("application/xhtml+xml",
343             SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
344                           std::string(),
345                           "application/xml"));
346
347   // Following our behavior with HTML, don't call other mime types XHTML.
348   EXPECT_EQ("text/plain",
349             SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
350                           std::string(),
351                           "text/plain"));
352   EXPECT_EQ("application/rss+xml",
353             SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
354                           std::string(),
355                           "application/rss+xml"));
356
357   // Don't sniff other HTML-looking bits as HTML.
358   EXPECT_EQ("text/xml",
359             SniffMimeType("<html><head>", std::string(), "text/xml"));
360   EXPECT_EQ("text/xml",
361             SniffMimeType("<foo><html xmlns=\"http://www.w3.org/1999/xhtml\">",
362                           std::string(),
363                           "text/xml"));
364 }
365
366 // Test content which is >= 1024 bytes, and includes no open angle bracket.
367 // http://code.google.com/p/chromium/issues/detail?id=3521
368 TEST(MimeSnifferTest, XMLTestLargeNoAngledBracket) {
369   // Make a large input, with 1024 bytes of "x".
370   std::string content;
371   content.resize(1024);
372   std::fill(content.begin(), content.end(), 'x');
373
374   // content.size() >= 1024 so the sniff is unambiguous.
375   std::string mime_type;
376   EXPECT_TRUE(SniffMimeType(content.data(), content.size(), GURL(),
377                             "text/xml", &mime_type));
378   EXPECT_EQ("text/xml", mime_type);
379 }
380
381 // Test content which is >= 1024 bytes, and includes a binary looking byte.
382 // http://code.google.com/p/chromium/issues/detail?id=15314
383 TEST(MimeSnifferTest, LooksBinary) {
384   // Make a large input, with 1024 bytes of "x" and 1 byte of 0x01.
385   std::string content;
386   content.resize(1024);
387   std::fill(content.begin(), content.end(), 'x');
388   content[1000] = 0x01;
389
390   // content.size() >= 1024 so the sniff is unambiguous.
391   std::string mime_type;
392   EXPECT_TRUE(SniffMimeType(content.data(), content.size(), GURL(),
393                             "text/plain", &mime_type));
394   EXPECT_EQ("application/octet-stream", mime_type);
395 }
396
397 TEST(MimeSnifferTest, OfficeTest) {
398   SnifferTest tests[] = {
399     // Check for URLs incorrectly reported as Microsoft Office files.
400     { "Hi there",
401       sizeof("Hi there")-1,
402       "http://www.example.com/foo.doc",
403       "application/msword", "application/octet-stream" },
404     { "Hi there",
405       sizeof("Hi there")-1,
406       "http://www.example.com/foo.xls",
407       "application/vnd.ms-excel", "application/octet-stream" },
408     { "Hi there",
409       sizeof("Hi there")-1,
410       "http://www.example.com/foo.ppt",
411       "application/vnd.ms-powerpoint", "application/octet-stream" },
412     // Check for Microsoft Office files incorrectly reported as text.
413     { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there",
414       sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1,
415       "http://www.example.com/foo.doc",
416       "text/plain", "application/msword" },
417     { "PK\x03\x04" "Hi there",
418       sizeof("PK\x03\x04" "Hi there")-1,
419       "http://www.example.com/foo.doc",
420       "text/plain",
421       "application/vnd.openxmlformats-officedocument."
422       "wordprocessingml.document" },
423     { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there",
424       sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1,
425       "http://www.example.com/foo.xls",
426       "text/plain", "application/vnd.ms-excel" },
427     { "PK\x03\x04" "Hi there",
428       sizeof("PK\x03\x04" "Hi there")-1,
429       "http://www.example.com/foo.xls",
430       "text/plain",
431       "application/vnd.openxmlformats-officedocument."
432       "spreadsheetml.sheet" },
433     { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there",
434       sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1,
435       "http://www.example.com/foo.ppt",
436       "text/plain", "application/vnd.ms-powerpoint" },
437     { "PK\x03\x04" "Hi there",
438       sizeof("PK\x03\x04" "Hi there")-1,
439       "http://www.example.com/foo.ppt",
440       "text/plain",
441       "application/vnd.openxmlformats-officedocument."
442       "presentationml.presentation" },
443   };
444
445   TestArray(tests, arraysize(tests));
446 }
447
448 // TODO(thestig) Add more tests for other AV formats. Add another test case for
449 // RAW images.
450 TEST(MimeSnifferTest, AudioVideoTest) {
451   std::string mime_type;
452   const char kFlacTestData[] =
453       "fLaC\x00\x00\x00\x22\x12\x00\x12\x00\x00\x00\x00\x00";
454   EXPECT_TRUE(SniffMimeTypeFromLocalData(kFlacTestData,
455                                          sizeof(kFlacTestData),
456                                          &mime_type));
457   EXPECT_EQ("audio/x-flac", mime_type);
458   mime_type.clear();
459
460   const char kWMATestData[] =
461       "\x30\x26\xb2\x75\x8e\x66\xcf\x11\xa6\xd9\x00\xaa\x00\x62\xce\x6c";
462   EXPECT_TRUE(SniffMimeTypeFromLocalData(kWMATestData,
463                                          sizeof(kWMATestData),
464                                          &mime_type));
465   EXPECT_EQ("video/x-ms-asf", mime_type);
466   mime_type.clear();
467
468   // mp4a, m4b, m4p, and alac extension files which share the same container
469   // format.
470   const char kMP4TestData[] =
471       "\x00\x00\x00\x20\x66\x74\x79\x70\x4d\x34\x41\x20\x00\x00\x00\x00";
472   EXPECT_TRUE(SniffMimeTypeFromLocalData(kMP4TestData,
473                                          sizeof(kMP4TestData),
474                                          &mime_type));
475   EXPECT_EQ("video/mp4", mime_type);
476   mime_type.clear();
477
478   const char kAACTestData[] =
479       "\xff\xf1\x50\x80\x02\x20\xb0\x23\x0a\x83\x20\x7d\x61\x90\x3e\xb1";
480   EXPECT_TRUE(SniffMimeTypeFromLocalData(kAACTestData,
481                                          sizeof(kAACTestData),
482                                          &mime_type));
483   EXPECT_EQ("audio/mpeg", mime_type);
484   mime_type.clear();
485 }
486
487 }  // namespace net