Upstream version 5.34.104.0
[platform/framework/web/crosswalk.git] / src / net / filter / sdch_filter.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/filter/sdch_filter.h"
6
7 #include <ctype.h>
8 #include <limits.h>
9
10 #include <algorithm>
11
12 #include "base/logging.h"
13 #include "base/metrics/histogram.h"
14 #include "net/base/sdch_manager.h"
15
16 #include "sdch/open-vcdiff/src/google/vcdecoder.h"
17
18 namespace net {
19
20 SdchFilter::SdchFilter(const FilterContext& filter_context)
21     : filter_context_(filter_context),
22       decoding_status_(DECODING_UNINITIALIZED),
23       dictionary_hash_(),
24       dictionary_hash_is_plausible_(false),
25       dictionary_(NULL),
26       dest_buffer_excess_(),
27       dest_buffer_excess_index_(0),
28       source_bytes_(0),
29       output_bytes_(0),
30       possible_pass_through_(false) {
31   bool success = filter_context.GetMimeType(&mime_type_);
32   DCHECK(success);
33   success = filter_context.GetURL(&url_);
34   DCHECK(success);
35 }
36
37 SdchFilter::~SdchFilter() {
38   // All code here is for gathering stats, and can be removed when SDCH is
39   // considered stable.
40
41   static int filter_use_count = 0;
42   ++filter_use_count;
43   if (META_REFRESH_RECOVERY == decoding_status_) {
44     UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count);
45   }
46
47   if (vcdiff_streaming_decoder_.get()) {
48     if (!vcdiff_streaming_decoder_->FinishDecoding()) {
49       decoding_status_ = DECODING_ERROR;
50       SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT);
51       // Make it possible for the user to hit reload, and get non-sdch content.
52       // Note this will "wear off" quickly enough, and is just meant to assure
53       // in some rare case that the user is not stuck.
54       SdchManager::BlacklistDomain(url_);
55       UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn",
56            static_cast<int>(filter_context_.GetByteReadCount()));
57       UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_);
58       UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_);
59     }
60   }
61
62   if (!dest_buffer_excess_.empty()) {
63     // Filter chaining error, or premature teardown.
64     SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT);
65     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn",
66          static_cast<int>(filter_context_.GetByteReadCount()));
67     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize",
68                          dest_buffer_excess_.size());
69     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_);
70     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_);
71   }
72
73   if (filter_context_.IsCachedContent()) {
74     // Not a real error, but it is useful to have this tally.
75     // TODO(jar): Remove this stat after SDCH stability is validated.
76     SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED);
77     return;  // We don't need timing stats, and we aready got ratios.
78   }
79
80   switch (decoding_status_) {
81     case DECODING_IN_PROGRESS: {
82       if (output_bytes_)
83         UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a",
84             static_cast<int>(
85                 (filter_context_.GetByteReadCount() * 100) / output_bytes_));
86       UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a",
87                            output_bytes_);
88       filter_context_.RecordPacketStats(FilterContext::SDCH_DECODE);
89
90       // Allow latency experiments to proceed.
91       SdchManager::Global()->SetAllowLatencyExperiment(url_, true);
92       return;
93     }
94     case PASS_THROUGH: {
95       filter_context_.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH);
96       return;
97     }
98     case DECODING_UNINITIALIZED: {
99       SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED);
100       return;
101     }
102     case WAITING_FOR_DICTIONARY_SELECTION: {
103       SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY);
104       return;
105     }
106     case DECODING_ERROR: {
107       SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR);
108       return;
109     }
110     case META_REFRESH_RECOVERY: {
111       // Already accounted for when set.
112       return;
113     }
114   }  // end of switch.
115 }
116
117 bool SdchFilter::InitDecoding(Filter::FilterType filter_type) {
118   if (decoding_status_ != DECODING_UNINITIALIZED)
119     return false;
120
121   // Handle case  where sdch filter is guessed, but not required.
122   if (FILTER_TYPE_SDCH_POSSIBLE == filter_type)
123     possible_pass_through_ = true;
124
125   // Initialize decoder only after we have a dictionary in hand.
126   decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION;
127   return true;
128 }
129
130 #ifndef NDEBUG
131 static const char* kDecompressionErrorHtml =
132   "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"
133   "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;"
134   "border-color:black;border-style:solid;text-align:left;font-family:arial;"
135   "font-size:10pt;foreground-color:black;background-color:white\">"
136   "An error occurred. This page will be reloaded shortly. "
137   "Or press the \"reload\" button now to reload it immediately."
138   "</div>";
139 #else
140 static const char* kDecompressionErrorHtml =
141   "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>";
142 #endif
143
144 Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer,
145                                                   int* dest_len) {
146   int available_space = *dest_len;
147   *dest_len = 0;  // Nothing output yet.
148
149   if (!dest_buffer || available_space <= 0)
150     return FILTER_ERROR;
151
152   if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) {
153     FilterStatus status = InitializeDictionary();
154     if (FILTER_NEED_MORE_DATA == status)
155       return FILTER_NEED_MORE_DATA;
156     if (FILTER_ERROR == status) {
157       DCHECK_EQ(DECODING_ERROR, decoding_status_);
158       DCHECK_EQ(0u, dest_buffer_excess_index_);
159       DCHECK(dest_buffer_excess_.empty());
160       // This is where we try very hard to do error recovery, and make this
161       // protocol robust in the face of proxies that do many different things.
162       // If we decide that things are looking very bad (too hard to recover),
163       // we may even issue a "meta-refresh" to reload the page without an SDCH
164       // advertisement (so that we are sure we're not hurting anything).
165       //
166       // Watch out for an error page inserted by the proxy as part of a 40x
167       // error response.  When we see such content molestation, we certainly
168       // need to fall into the meta-refresh case.
169       if (filter_context_.GetResponseCode() == 404) {
170         // We could be more generous, but for now, only a "NOT FOUND" code will
171         // cause a pass through.  All other bad codes will fall into a
172         // meta-refresh.
173         SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE);
174         decoding_status_ = PASS_THROUGH;
175       } else if (filter_context_.GetResponseCode() != 200) {
176         // We need to meta-refresh, with SDCH disabled.
177       } else if (filter_context_.IsCachedContent()
178                  && !dictionary_hash_is_plausible_) {
179         // We must have hit the back button, and gotten content that was fetched
180         // before we *really* advertised SDCH and a dictionary.
181         SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED);
182         decoding_status_ = PASS_THROUGH;
183       } else if (possible_pass_through_) {
184         // This is the potentially most graceful response. There really was no
185         // error. We were just overly cautious when we added a TENTATIVE_SDCH.
186         // We added the sdch coding tag, and it should not have been added.
187         // This can happen in server experiments, where the server decides
188         // not to use sdch, even though there is a dictionary.  To be
189         // conservative, we locally added the tentative sdch (fearing that a
190         // proxy stripped it!) and we must now recant (pass through).
191         SdchManager::SdchErrorRecovery(SdchManager::DISCARD_TENTATIVE_SDCH);
192         // However.... just to be sure we don't get burned by proxies that
193         // re-compress with gzip or other system, we can sniff to see if this
194         // is compressed data etc.  For now, we do nothing, which gets us into
195         // the meta-refresh result.
196         // TODO(jar): Improve robustness by sniffing for valid text that we can
197         // actual use re: decoding_status_ = PASS_THROUGH;
198       } else if (dictionary_hash_is_plausible_) {
199         // We need a meta-refresh since we don't have the dictionary.
200         // The common cause is a restart of the browser, where we try to render
201         // cached content that was saved when we had a dictionary.
202       } else if (filter_context_.IsSdchResponse()) {
203         // This is a very corrupt SDCH request response.  We can't decode it.
204         // We'll use a meta-refresh, and get content without asking for SDCH.
205         // This will also progressively disable SDCH for this domain.
206       } else {
207         // One of the first 9 bytes precluded consideration as a hash.
208         // This can't be an SDCH payload, even though the server said it was.
209         // This is a major error, as the server or proxy tagged this SDCH even
210         // though it is not!
211         // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!!
212         // Worse yet, meta-refresh could lead to an infinite refresh loop.
213         SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH);
214         decoding_status_ = PASS_THROUGH;
215         // ... but further back-off on advertising SDCH support.
216         SdchManager::BlacklistDomain(url_);
217       }
218
219       if (decoding_status_ == PASS_THROUGH) {
220         dest_buffer_excess_ = dictionary_hash_;  // Send what we scanned.
221       } else {
222         // This is where we try to do the expensive meta-refresh.
223         if (std::string::npos == mime_type_.find("text/html")) {
224           // Since we can't do a meta-refresh (along with an exponential
225           // backoff), we'll just make sure this NEVER happens again.
226           SdchManager::BlacklistDomainForever(url_);
227           if (filter_context_.IsCachedContent())
228             SdchManager::SdchErrorRecovery(
229                 SdchManager::CACHED_META_REFRESH_UNSUPPORTED);
230           else
231             SdchManager::SdchErrorRecovery(
232                 SdchManager::META_REFRESH_UNSUPPORTED);
233           return FILTER_ERROR;
234         }
235         // HTML content means we can issue a meta-refresh, and get the content
236         // again, perhaps without SDCH (to be safe).
237         if (filter_context_.IsCachedContent()) {
238           // Cached content is probably a startup tab, so we'll just get fresh
239           // content and try again, without disabling sdch.
240           SdchManager::SdchErrorRecovery(
241               SdchManager::META_REFRESH_CACHED_RECOVERY);
242         } else {
243           // Since it wasn't in the cache, we definately need at least some
244           // period of blacklisting to get the correct content.
245           SdchManager::BlacklistDomain(url_);
246           SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY);
247         }
248         decoding_status_ = META_REFRESH_RECOVERY;
249         // Issue a meta redirect with SDCH disabled.
250         dest_buffer_excess_ = kDecompressionErrorHtml;
251       }
252     } else {
253       DCHECK_EQ(DECODING_IN_PROGRESS, decoding_status_);
254     }
255   }
256
257   int amount = OutputBufferExcess(dest_buffer, available_space);
258   *dest_len += amount;
259   dest_buffer += amount;
260   available_space -= amount;
261   DCHECK_GE(available_space, 0);
262
263   if (available_space <= 0)
264     return FILTER_OK;
265   DCHECK(dest_buffer_excess_.empty());
266   DCHECK_EQ(0u, dest_buffer_excess_index_);
267
268   if (decoding_status_ != DECODING_IN_PROGRESS) {
269     if (META_REFRESH_RECOVERY == decoding_status_) {
270       // Absorb all input data.  We've already output page reload HTML.
271       next_stream_data_ = NULL;
272       stream_data_len_ = 0;
273       return FILTER_NEED_MORE_DATA;
274     }
275     if (PASS_THROUGH == decoding_status_) {
276       // We must pass in available_space, but it will be changed to bytes_used.
277       FilterStatus result = CopyOut(dest_buffer, &available_space);
278       // Accumulate the returned count of bytes_used (a.k.a., available_space).
279       *dest_len += available_space;
280       return result;
281     }
282     DCHECK(false);
283     decoding_status_ = DECODING_ERROR;
284     return FILTER_ERROR;
285   }
286
287   if (!next_stream_data_ || stream_data_len_ <= 0)
288     return FILTER_NEED_MORE_DATA;
289
290   bool ret = vcdiff_streaming_decoder_->DecodeChunk(
291     next_stream_data_, stream_data_len_, &dest_buffer_excess_);
292   // Assume all data was used in decoding.
293   next_stream_data_ = NULL;
294   source_bytes_ += stream_data_len_;
295   stream_data_len_ = 0;
296   output_bytes_ += dest_buffer_excess_.size();
297   if (!ret) {
298     vcdiff_streaming_decoder_.reset(NULL);  // Don't call it again.
299     decoding_status_ = DECODING_ERROR;
300     SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR);
301     return FILTER_ERROR;
302   }
303
304   amount = OutputBufferExcess(dest_buffer, available_space);
305   *dest_len += amount;
306   dest_buffer += amount;
307   available_space -= amount;
308   if (0 == available_space && !dest_buffer_excess_.empty())
309       return FILTER_OK;
310   return FILTER_NEED_MORE_DATA;
311 }
312
313 Filter::FilterStatus SdchFilter::InitializeDictionary() {
314   const size_t kServerIdLength = 9;  // Dictionary hash plus null from server.
315   size_t bytes_needed = kServerIdLength - dictionary_hash_.size();
316   DCHECK_GT(bytes_needed, 0u);
317   if (!next_stream_data_)
318     return FILTER_NEED_MORE_DATA;
319   if (static_cast<size_t>(stream_data_len_) < bytes_needed) {
320     dictionary_hash_.append(next_stream_data_, stream_data_len_);
321     next_stream_data_ = NULL;
322     stream_data_len_ = 0;
323     return FILTER_NEED_MORE_DATA;
324   }
325   dictionary_hash_.append(next_stream_data_, bytes_needed);
326   DCHECK(kServerIdLength == dictionary_hash_.size());
327   stream_data_len_ -= bytes_needed;
328   DCHECK_LE(0, stream_data_len_);
329   if (stream_data_len_ > 0)
330     next_stream_data_ += bytes_needed;
331   else
332     next_stream_data_ = NULL;
333
334   DCHECK(!dictionary_.get());
335   dictionary_hash_is_plausible_ = true;  // Assume plausible, but check.
336
337   SdchManager::Dictionary* dictionary = NULL;
338   if ('\0' == dictionary_hash_[kServerIdLength - 1])
339     SdchManager::Global()->GetVcdiffDictionary(std::string(dictionary_hash_, 0,
340                                                            kServerIdLength - 1),
341                                                url_, &dictionary);
342   else
343     dictionary_hash_is_plausible_ = false;
344
345   if (!dictionary) {
346     DCHECK(dictionary_hash_.size() == kServerIdLength);
347     // Since dictionary was not found, check to see if hash was even plausible.
348     for (size_t i = 0; i < kServerIdLength - 1; ++i) {
349       char base64_char = dictionary_hash_[i];
350       if (!isalnum(base64_char) && '-' != base64_char && '_' != base64_char) {
351         dictionary_hash_is_plausible_ = false;
352         break;
353       }
354     }
355     if (dictionary_hash_is_plausible_)
356       SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND);
357     else
358       SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED);
359     decoding_status_ = DECODING_ERROR;
360     return FILTER_ERROR;
361   }
362   dictionary_ = dictionary;
363   vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder);
364   vcdiff_streaming_decoder_->SetAllowVcdTarget(false);
365   vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(),
366                                            dictionary_->text().size());
367   decoding_status_ = DECODING_IN_PROGRESS;
368   return FILTER_OK;
369 }
370
371 int SdchFilter::OutputBufferExcess(char* const dest_buffer,
372                                    size_t available_space) {
373   if (dest_buffer_excess_.empty())
374     return 0;
375   DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_);
376   size_t amount = std::min(available_space,
377       dest_buffer_excess_.size() - dest_buffer_excess_index_);
378   memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_,
379          amount);
380   dest_buffer_excess_index_ += amount;
381   if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) {
382     DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_);
383     dest_buffer_excess_.clear();
384     dest_buffer_excess_index_ = 0;
385   }
386   return amount;
387 }
388
389 }  // namespace net