1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/basictypes.h"
6 #include "base/memory/scoped_vector.h"
7 #include "base/strings/utf_string_conversions.h"
8 #include "chrome/browser/history/query_parser.h"
9 #include "testing/gtest/include/gtest/gtest.h"
11 class QueryParserTest : public testing::Test {
15 const int expected_word_count;
18 std::string QueryToString(const std::string& query);
21 QueryParser query_parser_;
24 // Test helper: Convert a user query string in 8-bit (for hardcoding
25 // convenience) to a SQLite query string.
26 std::string QueryParserTest::QueryToString(const std::string& query) {
27 base::string16 sqlite_query;
28 query_parser_.ParseQuery(base::UTF8ToUTF16(query), &sqlite_query);
29 return base::UTF16ToUTF8(sqlite_query);
32 // Basic multi-word queries, including prefix matching.
33 TEST_F(QueryParserTest, SimpleQueries) {
34 EXPECT_EQ("", QueryToString(" "));
35 EXPECT_EQ("singleword*", QueryToString("singleword"));
36 EXPECT_EQ("spacedout*", QueryToString(" spacedout "));
37 EXPECT_EQ("foo* bar*", QueryToString("foo bar"));
38 // Short words aren't prefix matches. For Korean Hangul
39 // the minimum is 2 while for other scripts, it's 3.
40 EXPECT_EQ("f b", QueryToString(" f b"));
42 EXPECT_EQ(base::WideToUTF8(L"\xAC00 \xC7A5"),
43 QueryToString(base::WideToUTF8(L" \xAC00 \xC7A5")));
44 EXPECT_EQ("foo* bar*", QueryToString(" foo bar "));
46 EXPECT_EQ(base::WideToUTF8(L"\xAC00\xC7A5* \xBE5B\xACE0*"),
47 QueryToString(base::WideToUTF8(L"\xAC00\xC7A5 \xBE5B\xACE0")));
50 // Quoted substring parsing.
51 TEST_F(QueryParserTest, Quoted) {
53 EXPECT_EQ("\"Quoted\"", QueryToString("\"Quoted\""));
55 EXPECT_EQ("\"miss end\"", QueryToString("\"miss end"));
56 // Missing begin quotes
57 EXPECT_EQ("miss* beg*", QueryToString("miss beg\""));
59 EXPECT_EQ("\"Many\" \"quotes\"", QueryToString("\"Many \"\"quotes"));
62 // Apostrophes within words should be preserved, but otherwise stripped.
63 TEST_F(QueryParserTest, Apostrophes) {
64 EXPECT_EQ("foo* bar's*", QueryToString("foo bar's"));
65 EXPECT_EQ("l'foo*", QueryToString("l'foo"));
66 EXPECT_EQ("foo*", QueryToString("'foo"));
69 // Special characters.
70 TEST_F(QueryParserTest, SpecialChars) {
71 EXPECT_EQ("foo* the* bar*", QueryToString("!#:/*foo#$*;'* the!#:/*bar"));
74 TEST_F(QueryParserTest, NumWords) {
77 { "foo \"bar baz\"", 3 },
79 { "foo \"bar baz\" blah", 4 },
82 for (size_t i = 0; i < arraysize(data); ++i) {
83 base::string16 query_string;
84 EXPECT_EQ(data[i].expected_word_count,
85 query_parser_.ParseQuery(base::UTF8ToUTF16(data[i].input),
90 TEST_F(QueryParserTest, ParseQueryNodesAndMatch) {
92 const std::string query;
93 const std::string text;
95 const size_t m1_start;
97 const size_t m2_start;
100 { "foo", "fooey foo", true, 0, 3, 6, 9 },
101 { "foo foo", "foo", true, 0, 3, 0, 0 },
102 { "foo fooey", "fooey", true, 0, 5, 0, 0 },
103 { "fooey foo", "fooey", true, 0, 5, 0, 0 },
104 { "foo fooey bar", "bar fooey", true, 0, 3, 4, 9 },
105 { "blah", "blah", true, 0, 4, 0, 0 },
106 { "blah", "foo", false, 0, 0, 0, 0 },
107 { "blah", "blahblah", true, 0, 4, 0, 0 },
108 { "blah", "foo blah", true, 4, 8, 0, 0 },
109 { "foo blah", "blah", false, 0, 0, 0, 0 },
110 { "foo blah", "blahx foobar", true, 0, 4, 6, 9 },
111 { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 },
112 { "\"foo blah\"", "foox blahx", false, 0, 0, 0, 0 },
113 { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 },
114 { "\"foo blah\"", "\"foo blah\"", true, 1, 9, 0, 0 },
115 { "foo blah", "\"foo bar blah\"", true, 1, 4, 9, 13 },
117 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
119 ScopedVector<QueryNode> query_nodes;
120 parser.ParseQueryNodes(base::UTF8ToUTF16(data[i].query),
122 Snippet::MatchPositions match_positions;
123 ASSERT_EQ(data[i].matches,
124 parser.DoesQueryMatch(base::UTF8ToUTF16(data[i].text),
128 if (data[i].m1_start != 0 || data[i].m1_end != 0) {
129 ASSERT_TRUE(match_positions.size() >= 1);
130 EXPECT_EQ(data[i].m1_start, match_positions[0].first);
131 EXPECT_EQ(data[i].m1_end, match_positions[0].second);
134 if (data[i].m2_start != 0 || data[i].m2_end != 0) {
135 ASSERT_TRUE(match_positions.size() == 1 + offset);
136 EXPECT_EQ(data[i].m2_start, match_positions[offset].first);
137 EXPECT_EQ(data[i].m2_end, match_positions[offset].second);
142 TEST_F(QueryParserTest, ParseQueryWords) {
144 const std::string text;
145 const std::string w1;
146 const std::string w2;
147 const std::string w3;
148 const size_t word_count;
150 { "foo", "foo", "", "", 1 },
151 { "foo bar", "foo", "bar", "", 2 },
152 { "\"foo bar\"", "foo", "bar", "", 2 },
153 { "\"foo bar\" a", "foo", "bar", "a", 3 },
155 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
156 std::vector<base::string16> results;
158 parser.ParseQueryWords(base::UTF8ToUTF16(data[i].text), &results);
159 ASSERT_EQ(data[i].word_count, results.size());
160 EXPECT_EQ(data[i].w1, base::UTF16ToUTF8(results[0]));
161 if (results.size() == 2)
162 EXPECT_EQ(data[i].w2, base::UTF16ToUTF8(results[1]));
163 if (results.size() == 3)
164 EXPECT_EQ(data[i].w3, base::UTF16ToUTF8(results[2]));