Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / net / filter / sdch_filter.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/filter/sdch_filter.h"
6
7 #include <ctype.h>
8 #include <limits.h>
9
10 #include <algorithm>
11
12 #include "base/logging.h"
13 #include "base/metrics/histogram.h"
14 #include "net/base/sdch_manager.h"
15 #include "net/url_request/url_request_context.h"
16
17 #include "sdch/open-vcdiff/src/google/vcdecoder.h"
18
19 namespace net {
20
21 SdchFilter::SdchFilter(const FilterContext& filter_context)
22     : filter_context_(filter_context),
23       decoding_status_(DECODING_UNINITIALIZED),
24       dictionary_hash_(),
25       dictionary_hash_is_plausible_(false),
26       dictionary_(NULL),
27       url_request_context_(filter_context.GetURLRequestContext()),
28       dest_buffer_excess_(),
29       dest_buffer_excess_index_(0),
30       source_bytes_(0),
31       output_bytes_(0),
32       possible_pass_through_(false) {
33   bool success = filter_context.GetMimeType(&mime_type_);
34   DCHECK(success);
35   success = filter_context.GetURL(&url_);
36   DCHECK(success);
37   DCHECK(url_request_context_->sdch_manager());
38 }
39
40 SdchFilter::~SdchFilter() {
41   // All code here is for gathering stats, and can be removed when SDCH is
42   // considered stable.
43
44   static int filter_use_count = 0;
45   ++filter_use_count;
46   if (META_REFRESH_RECOVERY == decoding_status_) {
47     UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count);
48   }
49
50   if (vcdiff_streaming_decoder_.get()) {
51     if (!vcdiff_streaming_decoder_->FinishDecoding()) {
52       decoding_status_ = DECODING_ERROR;
53       SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT);
54       // Make it possible for the user to hit reload, and get non-sdch content.
55       // Note this will "wear off" quickly enough, and is just meant to assure
56       // in some rare case that the user is not stuck.
57       url_request_context_->sdch_manager()->BlacklistDomain(
58           url_, SdchManager::INCOMPLETE_SDCH_CONTENT);
59       UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn",
60            static_cast<int>(filter_context_.GetByteReadCount()));
61       UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_);
62       UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_);
63     }
64   }
65
66   if (!dest_buffer_excess_.empty()) {
67     // Filter chaining error, or premature teardown.
68     SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT);
69     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn",
70          static_cast<int>(filter_context_.GetByteReadCount()));
71     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize",
72                          dest_buffer_excess_.size());
73     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_);
74     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_);
75   }
76
77   if (filter_context_.IsCachedContent()) {
78     // Not a real error, but it is useful to have this tally.
79     // TODO(jar): Remove this stat after SDCH stability is validated.
80     SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED);
81     return;  // We don't need timing stats, and we aready got ratios.
82   }
83
84   switch (decoding_status_) {
85     case DECODING_IN_PROGRESS: {
86       if (output_bytes_)
87         UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a",
88             static_cast<int>(
89                 (filter_context_.GetByteReadCount() * 100) / output_bytes_));
90       UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a",
91                            output_bytes_);
92       filter_context_.RecordPacketStats(FilterContext::SDCH_DECODE);
93
94       // Allow latency experiments to proceed.
95       url_request_context_->sdch_manager()->SetAllowLatencyExperiment(
96           url_, true);
97       return;
98     }
99     case PASS_THROUGH: {
100       filter_context_.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH);
101       return;
102     }
103     case DECODING_UNINITIALIZED: {
104       SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED);
105       return;
106     }
107     case WAITING_FOR_DICTIONARY_SELECTION: {
108       SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY);
109       return;
110     }
111     case DECODING_ERROR: {
112       SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR);
113       return;
114     }
115     case META_REFRESH_RECOVERY: {
116       // Already accounted for when set.
117       return;
118     }
119   }  // end of switch.
120 }
121
122 bool SdchFilter::InitDecoding(Filter::FilterType filter_type) {
123   if (decoding_status_ != DECODING_UNINITIALIZED)
124     return false;
125
126   // Handle case  where sdch filter is guessed, but not required.
127   if (FILTER_TYPE_SDCH_POSSIBLE == filter_type)
128     possible_pass_through_ = true;
129
130   // Initialize decoder only after we have a dictionary in hand.
131   decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION;
132   return true;
133 }
134
135 #ifndef NDEBUG
136 static const char* kDecompressionErrorHtml =
137   "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"
138   "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;"
139   "border-color:black;border-style:solid;text-align:left;font-family:arial;"
140   "font-size:10pt;foreground-color:black;background-color:white\">"
141   "An error occurred. This page will be reloaded shortly. "
142   "Or press the \"reload\" button now to reload it immediately."
143   "</div>";
144 #else
145 static const char* kDecompressionErrorHtml =
146   "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>";
147 #endif
148
149 Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer,
150                                                   int* dest_len) {
151   int available_space = *dest_len;
152   *dest_len = 0;  // Nothing output yet.
153
154   if (!dest_buffer || available_space <= 0)
155     return FILTER_ERROR;
156
157   if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) {
158     FilterStatus status = InitializeDictionary();
159     if (FILTER_NEED_MORE_DATA == status)
160       return FILTER_NEED_MORE_DATA;
161     if (FILTER_ERROR == status) {
162       DCHECK_EQ(DECODING_ERROR, decoding_status_);
163       DCHECK_EQ(0u, dest_buffer_excess_index_);
164       DCHECK(dest_buffer_excess_.empty());
165       // This is where we try very hard to do error recovery, and make this
166       // protocol robust in the face of proxies that do many different things.
167       // If we decide that things are looking very bad (too hard to recover),
168       // we may even issue a "meta-refresh" to reload the page without an SDCH
169       // advertisement (so that we are sure we're not hurting anything).
170       //
171       // Watch out for an error page inserted by the proxy as part of a 40x
172       // error response.  When we see such content molestation, we certainly
173       // need to fall into the meta-refresh case.
174       if (filter_context_.GetResponseCode() == 404) {
175         // We could be more generous, but for now, only a "NOT FOUND" code will
176         // cause a pass through.  All other bad codes will fall into a
177         // meta-refresh.
178         SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE);
179         decoding_status_ = PASS_THROUGH;
180       } else if (filter_context_.GetResponseCode() != 200) {
181         // We need to meta-refresh, with SDCH disabled.
182       } else if (filter_context_.IsCachedContent()
183                  && !dictionary_hash_is_plausible_) {
184         // We must have hit the back button, and gotten content that was fetched
185         // before we *really* advertised SDCH and a dictionary.
186         SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED);
187         decoding_status_ = PASS_THROUGH;
188       } else if (possible_pass_through_) {
189         // This is the potentially most graceful response. There really was no
190         // error. We were just overly cautious when we added a TENTATIVE_SDCH.
191         // We added the sdch coding tag, and it should not have been added.
192         // This can happen in server experiments, where the server decides
193         // not to use sdch, even though there is a dictionary.  To be
194         // conservative, we locally added the tentative sdch (fearing that a
195         // proxy stripped it!) and we must now recant (pass through).
196         SdchManager::SdchErrorRecovery(SdchManager::DISCARD_TENTATIVE_SDCH);
197         // However.... just to be sure we don't get burned by proxies that
198         // re-compress with gzip or other system, we can sniff to see if this
199         // is compressed data etc.  For now, we do nothing, which gets us into
200         // the meta-refresh result.
201         // TODO(jar): Improve robustness by sniffing for valid text that we can
202         // actual use re: decoding_status_ = PASS_THROUGH;
203       } else if (dictionary_hash_is_plausible_) {
204         // We need a meta-refresh since we don't have the dictionary.
205         // The common cause is a restart of the browser, where we try to render
206         // cached content that was saved when we had a dictionary.
207       } else if (filter_context_.IsSdchResponse()) {
208         // This is a very corrupt SDCH request response.  We can't decode it.
209         // We'll use a meta-refresh, and get content without asking for SDCH.
210         // This will also progressively disable SDCH for this domain.
211       } else {
212         // One of the first 9 bytes precluded consideration as a hash.
213         // This can't be an SDCH payload, even though the server said it was.
214         // This is a major error, as the server or proxy tagged this SDCH even
215         // though it is not!
216         // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!!
217         // Worse yet, meta-refresh could lead to an infinite refresh loop.
218         SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH);
219         decoding_status_ = PASS_THROUGH;
220         // ... but further back-off on advertising SDCH support.
221         url_request_context_->sdch_manager()->BlacklistDomain(
222             url_, SdchManager::PASSING_THROUGH_NON_SDCH);
223       }
224
225       if (decoding_status_ == PASS_THROUGH) {
226         dest_buffer_excess_ = dictionary_hash_;  // Send what we scanned.
227       } else {
228         // This is where we try to do the expensive meta-refresh.
229         if (std::string::npos == mime_type_.find("text/html")) {
230           // Since we can't do a meta-refresh (along with an exponential
231           // backoff), we'll just make sure this NEVER happens again.
232           SdchManager::ProblemCodes problem =
233               (filter_context_.IsCachedContent() ?
234                SdchManager::CACHED_META_REFRESH_UNSUPPORTED :
235                SdchManager::META_REFRESH_UNSUPPORTED);
236           url_request_context_->sdch_manager()->BlacklistDomainForever(
237               url_, problem);
238           SdchManager::SdchErrorRecovery(problem);
239           return FILTER_ERROR;
240         }
241         // HTML content means we can issue a meta-refresh, and get the content
242         // again, perhaps without SDCH (to be safe).
243         if (filter_context_.IsCachedContent()) {
244           // Cached content is probably a startup tab, so we'll just get fresh
245           // content and try again, without disabling sdch.
246           SdchManager::SdchErrorRecovery(
247               SdchManager::META_REFRESH_CACHED_RECOVERY);
248         } else {
249           // Since it wasn't in the cache, we definately need at least some
250           // period of blacklisting to get the correct content.
251           url_request_context_->sdch_manager()->BlacklistDomain(
252               url_, SdchManager::META_REFRESH_RECOVERY);
253           SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY);
254         }
255         decoding_status_ = META_REFRESH_RECOVERY;
256         // Issue a meta redirect with SDCH disabled.
257         dest_buffer_excess_ = kDecompressionErrorHtml;
258       }
259     } else {
260       DCHECK_EQ(DECODING_IN_PROGRESS, decoding_status_);
261     }
262   }
263
264   int amount = OutputBufferExcess(dest_buffer, available_space);
265   *dest_len += amount;
266   dest_buffer += amount;
267   available_space -= amount;
268   DCHECK_GE(available_space, 0);
269
270   if (available_space <= 0)
271     return FILTER_OK;
272   DCHECK(dest_buffer_excess_.empty());
273   DCHECK_EQ(0u, dest_buffer_excess_index_);
274
275   if (decoding_status_ != DECODING_IN_PROGRESS) {
276     if (META_REFRESH_RECOVERY == decoding_status_) {
277       // Absorb all input data.  We've already output page reload HTML.
278       next_stream_data_ = NULL;
279       stream_data_len_ = 0;
280       return FILTER_NEED_MORE_DATA;
281     }
282     if (PASS_THROUGH == decoding_status_) {
283       // We must pass in available_space, but it will be changed to bytes_used.
284       FilterStatus result = CopyOut(dest_buffer, &available_space);
285       // Accumulate the returned count of bytes_used (a.k.a., available_space).
286       *dest_len += available_space;
287       return result;
288     }
289     DCHECK(false);
290     decoding_status_ = DECODING_ERROR;
291     return FILTER_ERROR;
292   }
293
294   if (!next_stream_data_ || stream_data_len_ <= 0)
295     return FILTER_NEED_MORE_DATA;
296
297   bool ret = vcdiff_streaming_decoder_->DecodeChunk(
298     next_stream_data_, stream_data_len_, &dest_buffer_excess_);
299   // Assume all data was used in decoding.
300   next_stream_data_ = NULL;
301   source_bytes_ += stream_data_len_;
302   stream_data_len_ = 0;
303   output_bytes_ += dest_buffer_excess_.size();
304   if (!ret) {
305     vcdiff_streaming_decoder_.reset(NULL);  // Don't call it again.
306     decoding_status_ = DECODING_ERROR;
307     SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR);
308     return FILTER_ERROR;
309   }
310
311   amount = OutputBufferExcess(dest_buffer, available_space);
312   *dest_len += amount;
313   dest_buffer += amount;
314   available_space -= amount;
315   if (0 == available_space && !dest_buffer_excess_.empty())
316       return FILTER_OK;
317   return FILTER_NEED_MORE_DATA;
318 }
319
320 Filter::FilterStatus SdchFilter::InitializeDictionary() {
321   const size_t kServerIdLength = 9;  // Dictionary hash plus null from server.
322   size_t bytes_needed = kServerIdLength - dictionary_hash_.size();
323   DCHECK_GT(bytes_needed, 0u);
324   if (!next_stream_data_)
325     return FILTER_NEED_MORE_DATA;
326   if (static_cast<size_t>(stream_data_len_) < bytes_needed) {
327     dictionary_hash_.append(next_stream_data_, stream_data_len_);
328     next_stream_data_ = NULL;
329     stream_data_len_ = 0;
330     return FILTER_NEED_MORE_DATA;
331   }
332   dictionary_hash_.append(next_stream_data_, bytes_needed);
333   DCHECK(kServerIdLength == dictionary_hash_.size());
334   stream_data_len_ -= bytes_needed;
335   DCHECK_LE(0, stream_data_len_);
336   if (stream_data_len_ > 0)
337     next_stream_data_ += bytes_needed;
338   else
339     next_stream_data_ = NULL;
340
341   DCHECK(!dictionary_);
342   dictionary_hash_is_plausible_ = true;  // Assume plausible, but check.
343
344   if ('\0' == dictionary_hash_[kServerIdLength - 1]) {
345     SdchManager* manager(url_request_context_->sdch_manager());
346     manager->GetVcdiffDictionary(
347         std::string(dictionary_hash_, 0, kServerIdLength - 1),
348         url_, &dictionary_);
349   } else {
350     dictionary_hash_is_plausible_ = false;
351   }
352
353   if (!dictionary_) {
354     DCHECK(dictionary_hash_.size() == kServerIdLength);
355     // Since dictionary was not found, check to see if hash was even plausible.
356     for (size_t i = 0; i < kServerIdLength - 1; ++i) {
357       char base64_char = dictionary_hash_[i];
358       if (!isalnum(base64_char) && '-' != base64_char && '_' != base64_char) {
359         dictionary_hash_is_plausible_ = false;
360         break;
361       }
362     }
363     if (dictionary_hash_is_plausible_)
364       SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND);
365     else
366       SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED);
367     decoding_status_ = DECODING_ERROR;
368     return FILTER_ERROR;
369   }
370   vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder);
371   vcdiff_streaming_decoder_->SetAllowVcdTarget(false);
372   vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(),
373                                            dictionary_->text().size());
374   decoding_status_ = DECODING_IN_PROGRESS;
375   return FILTER_OK;
376 }
377
378 int SdchFilter::OutputBufferExcess(char* const dest_buffer,
379                                    size_t available_space) {
380   if (dest_buffer_excess_.empty())
381     return 0;
382   DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_);
383   size_t amount = std::min(available_space,
384       dest_buffer_excess_.size() - dest_buffer_excess_index_);
385   memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_,
386          amount);
387   dest_buffer_excess_index_ += amount;
388   if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) {
389     DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_);
390     dest_buffer_excess_.clear();
391     dest_buffer_excess_index_ = 0;
392   }
393   return amount;
394 }
395
396 }  // namespace net