1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Parse the data returned from the SafeBrowsing v2.1 protocol response.
9 #include "base/format_macros.h"
10 #include "base/logging.h"
11 #include "base/strings/string_split.h"
12 #include "base/strings/stringprintf.h"
13 #include "base/sys_byteorder.h"
14 #include "build/build_config.h"
15 #include "chrome/browser/safe_browsing/protocol_parser.h"
16 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
19 // Helper function for quick scans of a line oriented protocol. Note that we use
20 // std::string::assign(const charT* s, size_type n)
21 // to copy data into 'line'. This form of 'assign' does not call strlen on
22 // 'input', which is binary data and is not NULL terminated. 'input' may also
23 // contain valid NULL bytes in the payload, which a strlen based copy would
25 bool GetLine(const char* input, int input_len, std::string* line) {
26 const char* pos = input;
27 while (pos && (pos - input < input_len)) {
29 line->assign(input, pos - input);
38 //------------------------------------------------------------------------------
39 // SafeBrowsingParser implementation
41 SafeBrowsingProtocolParser::SafeBrowsingProtocolParser() {
44 bool SafeBrowsingProtocolParser::ParseGetHash(
45 const char* chunk_data,
47 std::vector<SBFullHashResult>* full_hashes) {
49 int length = chunk_len;
50 const char* data = chunk_data;
55 if (!GetLine(data, length, &line))
58 offset = static_cast<int>(line.size()) + 1;
62 std::vector<std::string> cmd_parts;
63 base::SplitString(line, ':', &cmd_parts);
64 if (cmd_parts.size() != 3)
67 SBFullHashResult full_hash;
68 full_hash.list_name = cmd_parts[0];
69 full_hash.add_chunk_id = atoi(cmd_parts[1].c_str());
70 int full_hash_len = atoi(cmd_parts[2].c_str());
72 // Ignore hash results from lists we don't recognize.
73 if (safe_browsing_util::GetListId(full_hash.list_name) < 0) {
74 data += full_hash_len;
75 length -= full_hash_len;
79 while (full_hash_len > 0) {
80 DCHECK(static_cast<size_t>(full_hash_len) >= sizeof(SBFullHash));
81 memcpy(&full_hash.hash, data, sizeof(SBFullHash));
82 full_hashes->push_back(full_hash);
83 data += sizeof(SBFullHash);
84 length -= sizeof(SBFullHash);
85 full_hash_len -= sizeof(SBFullHash);
92 void SafeBrowsingProtocolParser::FormatGetHash(
93 const std::vector<SBPrefix>& prefixes, std::string* request) {
96 // Format the request for GetHash.
97 request->append(base::StringPrintf("%" PRIuS ":%" PRIuS "\n",
99 sizeof(SBPrefix) * prefixes.size()));
100 for (size_t i = 0; i < prefixes.size(); ++i) {
101 request->append(reinterpret_cast<const char*>(&prefixes[i]),
106 bool SafeBrowsingProtocolParser::ParseUpdate(
107 const char* chunk_data,
109 int* next_update_sec,
111 std::vector<SBChunkDelete>* deletes,
112 std::vector<ChunkUrl>* chunk_urls) {
113 DCHECK(next_update_sec);
117 int length = chunk_len;
118 const char* data = chunk_data;
121 std::string list_name;
124 std::string cmd_line;
125 if (!GetLine(data, length, &cmd_line))
126 return false; // Error: bad list format!
128 std::vector<std::string> cmd_parts;
129 base::SplitString(cmd_line, ':', &cmd_parts);
130 if (cmd_parts.empty())
132 const std::string& command = cmd_parts[0];
133 if (cmd_parts.size() != 2 && command[0] != 'u')
136 const int consumed = static_cast<int>(cmd_line.size()) + 1;
140 return false; // Parsing error.
142 // Differentiate on the first character of the command (which is usually
143 // only one character, with the exception of the 'ad' and 'sd' commands).
144 switch (command[0]) {
147 // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
148 // have also parsed the list name before getting here, or the add-del
149 // or sub-del will have no context.
150 if (command.size() != 2 || command[1] != 'd' || list_name.empty())
152 SBChunkDelete chunk_delete;
153 chunk_delete.is_sub_del = command[0] == 's';
154 StringToRanges(cmd_parts[1], &chunk_delete.chunk_del);
155 chunk_delete.list_name = list_name;
156 deletes->push_back(chunk_delete);
161 // The line providing the name of the list (i.e. 'goog-phish-shavar').
162 list_name = cmd_parts[1];
166 // The line providing the next earliest time (in seconds) to re-query.
167 *next_update_sec = atoi(cmd_parts[1].c_str());
172 chunk_url.url = cmd_line.substr(2); // Skip the initial "u:".
173 chunk_url.list_name = list_name;
174 chunk_urls->push_back(chunk_url);
179 if (cmd_parts[1] != "pleasereset")
185 // According to the spec, we ignore commands we don't understand.
193 bool SafeBrowsingProtocolParser::ParseChunk(const std::string& list_name,
196 SBChunkList* chunks) {
197 int remaining = length;
198 const char* chunk_data = data;
200 while (remaining > 0) {
201 std::string cmd_line;
202 if (!GetLine(chunk_data, remaining, &cmd_line))
203 return false; // Error: bad chunk format!
205 const int line_len = static_cast<int>(cmd_line.length()) + 1;
206 chunk_data += line_len;
207 remaining -= line_len;
208 std::vector<std::string> cmd_parts;
209 base::SplitString(cmd_line, ':', &cmd_parts);
210 if (cmd_parts.size() != 4) {
214 // Process the chunk data.
215 const int chunk_number = atoi(cmd_parts[1].c_str());
216 const int hash_len = atoi(cmd_parts[2].c_str());
217 if (hash_len != sizeof(SBPrefix) && hash_len != sizeof(SBFullHash)) {
218 VLOG(1) << "ParseChunk got unknown hashlen " << hash_len;
222 const int chunk_len = atoi(cmd_parts[3].c_str());
224 if (remaining < chunk_len)
225 return false; // parse error.
227 chunks->push_back(SBChunk());
228 chunks->back().chunk_number = chunk_number;
230 if (cmd_parts[0] == "a") {
231 chunks->back().is_add = true;
232 if (!ParseAddChunk(list_name, chunk_data, chunk_len, hash_len,
233 &chunks->back().hosts))
234 return false; // Parse error.
235 } else if (cmd_parts[0] == "s") {
236 chunks->back().is_add = false;
237 if (!ParseSubChunk(list_name, chunk_data, chunk_len, hash_len,
238 &chunks->back().hosts))
239 return false; // Parse error.
245 chunk_data += chunk_len;
246 remaining -= chunk_len;
247 DCHECK_LE(0, remaining);
250 DCHECK(remaining == 0);
255 bool SafeBrowsingProtocolParser::ParseAddChunk(const std::string& list_name,
259 std::deque<SBChunkHost>* hosts) {
260 const char* chunk_data = data;
261 int remaining = data_len;
263 SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
264 SBEntry::ADD_PREFIX : SBEntry::ADD_FULL_HASH;
266 if (list_name == safe_browsing_util::kBinHashList ||
267 list_name == safe_browsing_util::kDownloadWhiteList ||
268 list_name == safe_browsing_util::kExtensionBlacklist ||
269 list_name == safe_browsing_util::kIPBlacklist) {
270 // These lists only contain prefixes, no HOSTKEY and COUNT.
271 DCHECK_EQ(0, remaining % hash_len);
272 prefix_count = remaining / hash_len;
273 SBChunkHost chunk_host;
275 chunk_host.entry = SBEntry::Create(type, prefix_count);
276 hosts->push_back(chunk_host);
277 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, prefix_count))
279 DCHECK_GE(remaining, 0);
282 const int min_size = sizeof(SBPrefix) + 1;
283 while (remaining >= min_size) {
284 if (!ReadHostAndPrefixCount(&chunk_data, &remaining,
285 &host, &prefix_count)) {
288 DCHECK_GE(remaining, 0);
289 SBChunkHost chunk_host;
290 chunk_host.host = host;
291 chunk_host.entry = SBEntry::Create(type, prefix_count);
292 hosts->push_back(chunk_host);
293 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
296 DCHECK_GE(remaining, 0);
299 return remaining == 0;
302 bool SafeBrowsingProtocolParser::ParseSubChunk(const std::string& list_name,
306 std::deque<SBChunkHost>* hosts) {
307 int remaining = data_len;
308 const char* chunk_data = data;
310 SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
311 SBEntry::SUB_PREFIX : SBEntry::SUB_FULL_HASH;
313 if (list_name == safe_browsing_util::kBinHashList ||
314 list_name == safe_browsing_util::kDownloadWhiteList ||
315 list_name == safe_browsing_util::kExtensionBlacklist) {
316 SBChunkHost chunk_host;
317 // Set host to 0 and it won't be used for kBinHashList.
319 // kBinHashList only contains (add_chunk_number, prefix) pairs, no HOSTKEY
320 // and COUNT. |add_chunk_number| is int32.
321 prefix_count = remaining / (sizeof(int32) + hash_len);
322 chunk_host.entry = SBEntry::Create(type, prefix_count);
323 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, prefix_count))
325 DCHECK_GE(remaining, 0);
326 hosts->push_back(chunk_host);
329 const int min_size = 2 * sizeof(SBPrefix) + 1;
330 while (remaining >= min_size) {
331 if (!ReadHostAndPrefixCount(&chunk_data, &remaining,
332 &host, &prefix_count)) {
335 DCHECK_GE(remaining, 0);
336 SBChunkHost chunk_host;
337 chunk_host.host = host;
338 chunk_host.entry = SBEntry::Create(type, prefix_count);
339 hosts->push_back(chunk_host);
340 if (prefix_count == 0) {
341 // There is only an add chunk number (no prefixes).
343 if (!ReadChunkId(&chunk_data, &remaining, &chunk_id))
345 DCHECK_GE(remaining, 0);
346 chunk_host.entry->set_chunk_id(chunk_id);
349 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
352 DCHECK_GE(remaining, 0);
355 return remaining == 0;
358 bool SafeBrowsingProtocolParser::ReadHostAndPrefixCount(
359 const char** data, int* remaining, SBPrefix* host, int* count) {
360 if (static_cast<size_t>(*remaining) < sizeof(SBPrefix) + 1)
362 // Next 4 bytes are the host prefix.
363 memcpy(host, *data, sizeof(SBPrefix));
364 *data += sizeof(SBPrefix);
365 *remaining -= sizeof(SBPrefix);
367 // Next 1 byte is the prefix count (could be zero, but never negative).
368 *count = static_cast<unsigned char>(**data);
371 DCHECK_GE(*remaining, 0);
375 bool SafeBrowsingProtocolParser::ReadChunkId(
376 const char** data, int* remaining, int* chunk_id) {
377 // Protocol says four bytes, not sizeof(int). Make sure those
378 // values are the same.
379 DCHECK_EQ(sizeof(*chunk_id), 4u);
380 if (static_cast<size_t>(*remaining) < sizeof(*chunk_id))
382 memcpy(chunk_id, *data, sizeof(*chunk_id));
383 *data += sizeof(*chunk_id);
384 *remaining -= sizeof(*chunk_id);
385 *chunk_id = base::HostToNet32(*chunk_id);
386 DCHECK_GE(*remaining, 0);
390 bool SafeBrowsingProtocolParser::ReadPrefixes(
391 const char** data, int* remaining, SBEntry* entry, int count) {
392 int hash_len = entry->HashLen();
393 for (int i = 0; i < count; ++i) {
394 if (entry->IsSub()) {
396 if (!ReadChunkId(data, remaining, &chunk_id))
398 DCHECK_GE(*remaining, 0);
399 entry->SetChunkIdAtPrefix(i, chunk_id);
402 if (*remaining < hash_len)
404 if (entry->IsPrefix()) {
406 DCHECK_EQ(hash_len, (int)sizeof(prefix));
407 memcpy(&prefix, *data, sizeof(prefix));
408 entry->SetPrefixAt(i, prefix);
411 DCHECK_EQ(hash_len, (int)sizeof(hash));
412 memcpy(&hash, *data, sizeof(hash));
413 entry->SetFullHashAt(i, hash);
416 *remaining -= hash_len;
417 DCHECK_GE(*remaining, 0);