Update To 11.40.268.0
[platform/framework/web/crosswalk.git] / src / components / query_parser / query_parser_unittest.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/basictypes.h"
6 #include "base/memory/scoped_vector.h"
7 #include "base/strings/utf_string_conversions.h"
8 #include "components/query_parser/query_parser.h"
9 #include "testing/gtest/include/gtest/gtest.h"
10
11 namespace query_parser {
12
13 class QueryParserTest : public testing::Test {
14  public:
15   struct TestData {
16     const char* input;
17     const int expected_word_count;
18   };
19
20   std::string QueryToString(const std::string& query);
21
22  protected:
23   QueryParser query_parser_;
24 };
25
26 // Test helper: Convert a user query string in 8-bit (for hardcoding
27 // convenience) to a SQLite query string.
28 std::string QueryParserTest::QueryToString(const std::string& query) {
29   base::string16 sqlite_query;
30   query_parser_.ParseQuery(base::UTF8ToUTF16(query),
31                            MatchingAlgorithm::DEFAULT,
32                            &sqlite_query);
33   return base::UTF16ToUTF8(sqlite_query);
34 }
35
36 // Basic multi-word queries, including prefix matching.
37 TEST_F(QueryParserTest, SimpleQueries) {
38   EXPECT_EQ("", QueryToString(" "));
39   EXPECT_EQ("singleword*", QueryToString("singleword"));
40   EXPECT_EQ("spacedout*", QueryToString("  spacedout "));
41   EXPECT_EQ("foo* bar*", QueryToString("foo bar"));
42   // Short words aren't prefix matches. For Korean Hangul
43   // the minimum is 2 while for other scripts, it's 3.
44   EXPECT_EQ("f b", QueryToString(" f b"));
45   // KA JANG
46   EXPECT_EQ(base::WideToUTF8(L"\xAC00 \xC7A5"),
47             QueryToString(base::WideToUTF8(L" \xAC00 \xC7A5")));
48   EXPECT_EQ("foo* bar*", QueryToString(" foo   bar "));
49   // KA-JANG BICH-GO
50   EXPECT_EQ(base::WideToUTF8(L"\xAC00\xC7A5* \xBE5B\xACE0*"),
51             QueryToString(base::WideToUTF8(L"\xAC00\xC7A5 \xBE5B\xACE0")));
52 }
53
54 // Quoted substring parsing.
55 TEST_F(QueryParserTest, Quoted) {
56   // ASCII quotes
57   EXPECT_EQ("\"Quoted\"", QueryToString("\"Quoted\""));
58   // Missing end quotes
59   EXPECT_EQ("\"miss end\"", QueryToString("\"miss end"));
60   // Missing begin quotes
61   EXPECT_EQ("miss* beg*", QueryToString("miss beg\""));
62   // Weird formatting
63   EXPECT_EQ("\"Many\" \"quotes\"", QueryToString("\"Many   \"\"quotes"));
64 }
65
66 // Apostrophes within words should be preserved, but otherwise stripped.
67 TEST_F(QueryParserTest, Apostrophes) {
68   EXPECT_EQ("foo* bar's*", QueryToString("foo bar's"));
69   EXPECT_EQ("l'foo*", QueryToString("l'foo"));
70   EXPECT_EQ("foo*", QueryToString("'foo"));
71 }
72
73 // Special characters.
74 TEST_F(QueryParserTest, SpecialChars) {
75   EXPECT_EQ("foo* the* bar*", QueryToString("!#:/*foo#$*;'* the!#:/*bar"));
76 }
77
78 TEST_F(QueryParserTest, NumWords) {
79   TestData data[] = {
80     { "blah",                  1 },
81     { "foo \"bar baz\"",       3 },
82     { "foo \"baz\"",           2 },
83     { "foo \"bar baz\"  blah", 4 },
84   };
85
86   for (size_t i = 0; i < arraysize(data); ++i) {
87     base::string16 query_string;
88     EXPECT_EQ(data[i].expected_word_count,
89               query_parser_.ParseQuery(base::UTF8ToUTF16(data[i].input),
90                                        MatchingAlgorithm::DEFAULT,
91                                        &query_string));
92   }
93 }
94
95 TEST_F(QueryParserTest, ParseQueryNodesAndMatch) {
96   struct TestData2 {
97     const std::string query;
98     const std::string text;
99     const bool matches;
100     const size_t m1_start;
101     const size_t m1_end;
102     const size_t m2_start;
103     const size_t m2_end;
104   } data[] = {
105     { "foo",           "fooey foo",        true,  0, 3, 6, 9 },
106     { "foo foo",       "foo",              true,  0, 3, 0, 0 },
107     { "foo fooey",     "fooey",            true,  0, 5, 0, 0 },
108     { "fooey foo",     "fooey",            true,  0, 5, 0, 0 },
109     { "foo fooey bar", "bar fooey",        true,  0, 3, 4, 9 },
110     { "blah",          "blah",             true,  0, 4, 0, 0 },
111     { "blah",          "foo",              false, 0, 0, 0, 0 },
112     { "blah",          "blahblah",         true,  0, 4, 0, 0 },
113     { "blah",          "foo blah",         true,  4, 8, 0, 0 },
114     { "foo blah",      "blah",             false, 0, 0, 0, 0 },
115     { "foo blah",      "blahx foobar",     true,  0, 4, 6, 9 },
116     { "\"foo blah\"",  "foo blah",         true,  0, 8, 0, 0 },
117     { "\"foo blah\"",  "foox blahx",       false, 0, 0, 0, 0 },
118     { "\"foo blah\"",  "foo blah",         true,  0, 8, 0, 0 },
119     { "\"foo blah\"",  "\"foo blah\"",     true,  1, 9, 0, 0 },
120     { "foo blah",      "\"foo bar blah\"", true,  1, 4, 9, 13 },
121   };
122   for (size_t i = 0; i < arraysize(data); ++i) {
123     QueryParser parser;
124     ScopedVector<QueryNode> query_nodes;
125     parser.ParseQueryNodes(base::UTF8ToUTF16(data[i].query),
126                            MatchingAlgorithm::DEFAULT,
127                            &query_nodes.get());
128     Snippet::MatchPositions match_positions;
129     ASSERT_EQ(data[i].matches,
130               parser.DoesQueryMatch(base::UTF8ToUTF16(data[i].text),
131                                     query_nodes.get(),
132                                     &match_positions));
133     size_t offset = 0;
134     if (data[i].m1_start != 0 || data[i].m1_end != 0) {
135       ASSERT_TRUE(match_positions.size() >= 1);
136       EXPECT_EQ(data[i].m1_start, match_positions[0].first);
137       EXPECT_EQ(data[i].m1_end, match_positions[0].second);
138       offset++;
139     }
140     if (data[i].m2_start != 0 || data[i].m2_end != 0) {
141       ASSERT_TRUE(match_positions.size() == 1 + offset);
142       EXPECT_EQ(data[i].m2_start, match_positions[offset].first);
143       EXPECT_EQ(data[i].m2_end, match_positions[offset].second);
144     }
145   }
146 }
147
148 TEST_F(QueryParserTest, ParseQueryWords) {
149   struct TestData2 {
150     const std::string text;
151     const std::string w1;
152     const std::string w2;
153     const std::string w3;
154     const size_t word_count;
155   } data[] = {
156     { "foo",           "foo", "",    "",  1 },
157     { "foo bar",       "foo", "bar", "",  2 },
158     { "\"foo bar\"",   "foo", "bar", "",  2 },
159     { "\"foo bar\" a", "foo", "bar", "a", 3 },
160   };
161   for (size_t i = 0; i < arraysize(data); ++i) {
162     std::vector<base::string16> results;
163     QueryParser parser;
164     parser.ParseQueryWords(base::UTF8ToUTF16(data[i].text),
165                            MatchingAlgorithm::DEFAULT,
166                            &results);
167     ASSERT_EQ(data[i].word_count, results.size());
168     EXPECT_EQ(data[i].w1, base::UTF16ToUTF8(results[0]));
169     if (results.size() == 2)
170       EXPECT_EQ(data[i].w2, base::UTF16ToUTF8(results[1]));
171     if (results.size() == 3)
172       EXPECT_EQ(data[i].w3, base::UTF16ToUTF8(results[2]));
173   }
174 }
175
176 }  // namespace query_parser