1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Utilities for the SafeBrowsing code.
7 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
8 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
16 #include "base/basictypes.h"
17 #include "base/strings/string_piece.h"
18 #include "chrome/browser/safe_browsing/chunk_range.h"
24 // A truncated hash's type.
25 typedef uint32 SBPrefix;
27 // Container for holding a chunk URL and the list it belongs to.
30 std::string list_name;
39 inline bool SBFullHashEqual(const SBFullHash& a, const SBFullHash& b) {
40 return !memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash));
43 // Generate full hash for the given string.
44 SBFullHash SBFullHashForString(const base::StringPiece& str);
46 // Container for information about a specific host in an add/sub chunk.
52 // Container for an add/sub chunk.
60 std::deque<SBChunkHost> hosts;
63 // Container for a set of chunks. Interim wrapper to replace use of
64 // |std::deque<SBChunk>| with something having safer memory semantics.
66 // TODO(shess): |SBEntry| is currently a very roundabout way to hold
67 // things pending storage. It could be replaced with the structures
68 // used in SafeBrowsingStore, then lots of bridging code could
75 // Implement that subset of the |std::deque<>| interface which
77 bool empty() const { return chunks_.empty(); }
78 size_t size() { return chunks_.size(); }
80 void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); }
81 SBChunk& back() { return chunks_.back(); }
82 SBChunk& front() { return chunks_.front(); }
83 const SBChunk& front() const { return chunks_.front(); }
85 typedef std::vector<SBChunk>::const_iterator const_iterator;
86 const_iterator begin() const { return chunks_.begin(); }
87 const_iterator end() const { return chunks_.end(); }
89 typedef std::vector<SBChunk>::iterator iterator;
90 iterator begin() { return chunks_.begin(); }
91 iterator end() { return chunks_.end(); }
93 SBChunk& operator[](size_t n) { return chunks_[n]; }
94 const SBChunk& operator[](size_t n) const { return chunks_[n]; }
96 // Calls |SBEvent::Destroy()| before clearing |chunks_|.
100 std::vector<SBChunk> chunks_;
102 DISALLOW_COPY_AND_ASSIGN(SBChunkList);
105 // Used when we get a gethash response.
106 struct SBFullHashResult {
108 std::string list_name;
112 // Contains information about a list in the database.
113 struct SBListChunkRanges {
114 explicit SBListChunkRanges(const std::string& n);
116 std::string name; // The list name.
117 std::string adds; // The ranges for add chunks.
118 std::string subs; // The ranges for sub chunks.
121 // Container for deleting chunks from the database.
122 struct SBChunkDelete {
126 std::string list_name;
128 std::vector<ChunkRange> chunk_del;
131 // Different types of threats that SafeBrowsing protects against.
136 // The URL is being used for phishing.
137 SB_THREAT_TYPE_URL_PHISHING,
139 // The URL hosts malware.
140 SB_THREAT_TYPE_URL_MALWARE,
142 // The download URL is malware.
143 SB_THREAT_TYPE_BINARY_MALWARE_URL,
145 // Url detected by the client-side phishing model. Note that unlike the
146 // above values, this does not correspond to a downloaded list.
147 SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL,
149 // The Chrome extension or app (given by its ID) is malware.
150 SB_THREAT_TYPE_EXTENSION,
152 // Url detected by the client-side malware IP list. This IP list is part
153 // of the client side detection model.
154 SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL,
157 // SBEntry ---------------------------------------------------------------------
159 // Holds information about the prefixes for a hostkey. prefixes can either be
160 // 4 bytes (truncated hash) or 32 bytes (full hash).
162 // [list id ][chunk id][prefix count (0..n)][prefix1][prefix2]
164 // [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)]
165 // [add chunk][prefix][add chunk][prefix]
169 ADD_PREFIX, // 4 byte add entry.
170 SUB_PREFIX, // 4 byte sub entry.
171 ADD_FULL_HASH, // 32 byte add entry.
172 SUB_FULL_HASH, // 32 byte sub entry.
175 // Creates a SBEntry with the necessary size for the given number of prefixes.
176 // Caller ownes the object and needs to free it by calling Destroy.
177 static SBEntry* Create(Type type, int prefix_count);
179 // Frees the entry's memory.
182 void set_list_id(int list_id) { data_.list_id = list_id; }
183 int list_id() const { return data_.list_id; }
184 void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; }
185 int chunk_id() const { return data_.chunk_id; }
186 int prefix_count() const { return data_.prefix_count; }
188 // Returns true if this is a prefix as opposed to a full hash.
189 bool IsPrefix() const {
190 return type() == ADD_PREFIX || type() == SUB_PREFIX;
193 // Returns true if this is an add entry.
195 return type() == ADD_PREFIX || type() == ADD_FULL_HASH;
198 // Returns true if this is a sub entry.
200 return type() == SUB_PREFIX || type() == SUB_FULL_HASH;
203 // Helper to return the size of the prefixes.
204 int HashLen() const {
205 return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash);
208 // For add entries, returns the add chunk id. For sub entries, returns the
209 // add_chunk id for the prefix at the given index.
210 int ChunkIdAtPrefix(int index) const;
212 // Used for sub chunks to set the chunk id at a given index.
213 void SetChunkIdAtPrefix(int index, int chunk_id);
215 // Return the prefix/full hash at the given index. Caller is expected to
216 // call the right function based on the hash length.
217 const SBPrefix& PrefixAt(int index) const;
218 const SBFullHash& FullHashAt(int index) const;
220 // Return the prefix/full hash at the given index. Caller is expected to
221 // call the right function based on the hash length.
222 void SetPrefixAt(int index, const SBPrefix& prefix);
223 void SetFullHashAt(int index, const SBFullHash& full_hash);
226 // Container for a sub prefix.
232 // Container for a sub full hash.
233 struct SBSubFullHash {
238 // Keep the fixed data together in one struct so that we can get its size
239 // easily. If any of this is modified, the database will have to be cleared.
242 // For adds, this is the add chunk number.
243 // For subs: if prefix_count is 0 then this is the add chunk that this sub
244 // refers to. Otherwise it's ignored, and the add_chunk in sub_prefixes
245 // or sub_full_hashes is used for each corresponding prefix.
254 // Helper to return the size of each prefix entry (i.e. for subs this
255 // includes an add chunk id).
256 static int PrefixSize(Type type);
258 // Helper to return how much memory a given Entry would require.
259 static int Size(Type type, int prefix_count);
261 // Returns how many bytes this entry is.
264 Type type() const { return data_.type; }
266 void set_prefix_count(int count) { data_.prefix_count = count; }
267 void set_type(Type type) { data_.type = type; }
269 // The prefixes union must follow the fixed data so that they're contiguous
273 SBPrefix add_prefixes_[1];
274 SBSubPrefix sub_prefixes_[1];
275 SBFullHash add_full_hashes_[1];
276 SBSubFullHash sub_full_hashes_[1];
281 // Utility functions -----------------------------------------------------------
283 namespace safe_browsing_util {
285 // SafeBrowsing list names.
286 extern const char kMalwareList[];
287 extern const char kPhishingList[];
288 // Binary Download list name.
289 extern const char kBinUrlList[];
290 // SafeBrowsing client-side detection whitelist list name.
291 extern const char kCsdWhiteList[];
292 // SafeBrowsing download whitelist list name.
293 extern const char kDownloadWhiteList[];
294 // SafeBrowsing extension list name.
295 extern const char kExtensionBlacklist[];
296 // SafeBrowsing side-effect free whitelist name.
297 extern const char kSideEffectFreeWhitelist[];
298 // SafeBrowsing csd malware IP blacklist name.
299 extern const char kIPBlacklist[];
301 // This array must contain all Safe Browsing lists.
302 extern const char* kAllLists[8];
309 // Obsolete BINHASH = 3,
311 // SafeBrowsing lists are stored in pairs. Keep ListType 5
312 // available for a potential second list that we would store in the
313 // csd-whitelist store file.
314 DOWNLOADWHITELIST = 6,
315 // See above comment. Leave 7 available.
316 EXTENSIONBLACKLIST = 8,
317 // See above comment. Leave 9 available.
318 SIDEEFFECTFREEWHITELIST = 10,
319 // See above comment. Leave 11 available.
321 // See above comment. Leave 13 available.
324 // Maps a list name to ListType.
325 ListType GetListId(const std::string& name);
327 // Maps a ListId to list name. Return false if fails.
328 bool GetListName(ListType list_id, std::string* list);
330 // Canonicalizes url as per Google Safe Browsing Specification.
331 // See section 6.1 in
332 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
333 void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
334 std::string* canonicalized_path,
335 std::string* canonicalized_query);
337 // Given a URL, returns all the hosts we need to check. They are returned
338 // in order of size (i.e. b.c is first, then a.b.c).
339 void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
341 // Given a URL, returns all the paths we need to check.
342 void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
344 // Given a URL, returns all the patterns we need to check.
345 void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls);
347 int GetHashIndex(const SBFullHash& hash,
348 const std::vector<SBFullHashResult>& full_hashes);
350 // Given a URL, compare all the possible host + path full hashes to the set of
351 // provided full hashes. Returns the index of the match if one is found, or -1
353 int GetUrlHashIndex(const GURL& url,
354 const std::vector<SBFullHashResult>& full_hashes);
356 bool IsPhishingList(const std::string& list_name);
357 bool IsMalwareList(const std::string& list_name);
358 bool IsBadbinurlList(const std::string& list_name);
359 bool IsExtensionList(const std::string& list_name);
361 GURL GeneratePhishingReportUrl(const std::string& report_page,
362 const std::string& url_to_report,
363 bool is_client_side_detection);
365 SBFullHash StringToSBFullHash(const std::string& hash_in);
366 std::string SBFullHashToString(const SBFullHash& hash_out);
368 } // namespace safe_browsing_util
370 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_