Upstream version 5.34.98.0
[platform/framework/web/crosswalk.git] / src / net / base / sdch_filter.cc
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/base/sdch_filter.h"
6
7 #include <limits.h>
8 #include <ctype.h>
9 #include <algorithm>
10
11 #include "base/logging.h"
12 #include "base/metrics/histogram.h"
13 #include "net/base/sdch_manager.h"
14
15 #include "sdch/open-vcdiff/src/google/vcdecoder.h"
16
17 namespace net {
18
19 SdchFilter::SdchFilter(const FilterContext& filter_context)
20     : filter_context_(filter_context),
21       decoding_status_(DECODING_UNINITIALIZED),
22       dictionary_hash_(),
23       dictionary_hash_is_plausible_(false),
24       dictionary_(NULL),
25       dest_buffer_excess_(),
26       dest_buffer_excess_index_(0),
27       source_bytes_(0),
28       output_bytes_(0),
29       possible_pass_through_(false) {
30   bool success = filter_context.GetMimeType(&mime_type_);
31   DCHECK(success);
32   success = filter_context.GetURL(&url_);
33   DCHECK(success);
34 }
35
36 SdchFilter::~SdchFilter() {
37   // All code here is for gathering stats, and can be removed when SDCH is
38   // considered stable.
39
40   static int filter_use_count = 0;
41   ++filter_use_count;
42   if (META_REFRESH_RECOVERY == decoding_status_) {
43     UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count);
44   }
45
46   if (vcdiff_streaming_decoder_.get()) {
47     if (!vcdiff_streaming_decoder_->FinishDecoding()) {
48       decoding_status_ = DECODING_ERROR;
49       SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT);
50       // Make it possible for the user to hit reload, and get non-sdch content.
51       // Note this will "wear off" quickly enough, and is just meant to assure
52       // in some rare case that the user is not stuck.
53       SdchManager::BlacklistDomain(url_);
54       UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn",
55            static_cast<int>(filter_context_.GetByteReadCount()));
56       UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_);
57       UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_);
58     }
59   }
60
61   if (!dest_buffer_excess_.empty()) {
62     // Filter chaining error, or premature teardown.
63     SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT);
64     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn",
65          static_cast<int>(filter_context_.GetByteReadCount()));
66     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize",
67                          dest_buffer_excess_.size());
68     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_);
69     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_);
70   }
71
72   if (filter_context_.IsCachedContent()) {
73     // Not a real error, but it is useful to have this tally.
74     // TODO(jar): Remove this stat after SDCH stability is validated.
75     SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED);
76     return;  // We don't need timing stats, and we aready got ratios.
77   }
78
79   switch (decoding_status_) {
80     case DECODING_IN_PROGRESS: {
81       if (output_bytes_)
82         UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a",
83             static_cast<int>(
84                 (filter_context_.GetByteReadCount() * 100) / output_bytes_));
85       UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a",
86                            output_bytes_);
87       filter_context_.RecordPacketStats(FilterContext::SDCH_DECODE);
88
89       // Allow latency experiments to proceed.
90       SdchManager::Global()->SetAllowLatencyExperiment(url_, true);
91       return;
92     }
93     case PASS_THROUGH: {
94       filter_context_.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH);
95       return;
96     }
97     case DECODING_UNINITIALIZED: {
98       SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED);
99       return;
100     }
101     case WAITING_FOR_DICTIONARY_SELECTION: {
102       SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY);
103       return;
104     }
105     case DECODING_ERROR: {
106       SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR);
107       return;
108     }
109     case META_REFRESH_RECOVERY: {
110       // Already accounted for when set.
111       return;
112     }
113   }  // end of switch.
114 }
115
116 bool SdchFilter::InitDecoding(Filter::FilterType filter_type) {
117   if (decoding_status_ != DECODING_UNINITIALIZED)
118     return false;
119
120   // Handle case  where sdch filter is guessed, but not required.
121   if (FILTER_TYPE_SDCH_POSSIBLE == filter_type)
122     possible_pass_through_ = true;
123
124   // Initialize decoder only after we have a dictionary in hand.
125   decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION;
126   return true;
127 }
128
129 #ifndef NDEBUG
130 static const char* kDecompressionErrorHtml =
131   "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"
132   "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;"
133   "border-color:black;border-style:solid;text-align:left;font-family:arial;"
134   "font-size:10pt;foreground-color:black;background-color:white\">"
135   "An error occurred. This page will be reloaded shortly. "
136   "Or press the \"reload\" button now to reload it immediately."
137   "</div>";
138 #else
139 static const char* kDecompressionErrorHtml =
140   "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>";
141 #endif
142
143 Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer,
144                                                   int* dest_len) {
145   int available_space = *dest_len;
146   *dest_len = 0;  // Nothing output yet.
147
148   if (!dest_buffer || available_space <= 0)
149     return FILTER_ERROR;
150
151   if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) {
152     FilterStatus status = InitializeDictionary();
153     if (FILTER_NEED_MORE_DATA == status)
154       return FILTER_NEED_MORE_DATA;
155     if (FILTER_ERROR == status) {
156       DCHECK_EQ(DECODING_ERROR, decoding_status_);
157       DCHECK_EQ(0u, dest_buffer_excess_index_);
158       DCHECK(dest_buffer_excess_.empty());
159       // This is where we try very hard to do error recovery, and make this
160       // protocol robust in the face of proxies that do many different things.
161       // If we decide that things are looking very bad (too hard to recover),
162       // we may even issue a "meta-refresh" to reload the page without an SDCH
163       // advertisement (so that we are sure we're not hurting anything).
164       //
165       // Watch out for an error page inserted by the proxy as part of a 40x
166       // error response.  When we see such content molestation, we certainly
167       // need to fall into the meta-refresh case.
168       if (filter_context_.GetResponseCode() == 404) {
169         // We could be more generous, but for now, only a "NOT FOUND" code will
170         // cause a pass through.  All other bad codes will fall into a
171         // meta-refresh.
172         SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE);
173         decoding_status_ = PASS_THROUGH;
174       } else if (filter_context_.GetResponseCode() != 200) {
175         // We need to meta-refresh, with SDCH disabled.
176       } else if (filter_context_.IsCachedContent()
177                  && !dictionary_hash_is_plausible_) {
178         // We must have hit the back button, and gotten content that was fetched
179         // before we *really* advertised SDCH and a dictionary.
180         SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED);
181         decoding_status_ = PASS_THROUGH;
182       } else if (possible_pass_through_) {
183         // This is the potentially most graceful response. There really was no
184         // error. We were just overly cautious when we added a TENTATIVE_SDCH.
185         // We added the sdch coding tag, and it should not have been added.
186         // This can happen in server experiments, where the server decides
187         // not to use sdch, even though there is a dictionary.  To be
188         // conservative, we locally added the tentative sdch (fearing that a
189         // proxy stripped it!) and we must now recant (pass through).
190         SdchManager::SdchErrorRecovery(SdchManager::DISCARD_TENTATIVE_SDCH);
191         // However.... just to be sure we don't get burned by proxies that
192         // re-compress with gzip or other system, we can sniff to see if this
193         // is compressed data etc.  For now, we do nothing, which gets us into
194         // the meta-refresh result.
195         // TODO(jar): Improve robustness by sniffing for valid text that we can
196         // actual use re: decoding_status_ = PASS_THROUGH;
197       } else if (dictionary_hash_is_plausible_) {
198         // We need a meta-refresh since we don't have the dictionary.
199         // The common cause is a restart of the browser, where we try to render
200         // cached content that was saved when we had a dictionary.
201       } else if (filter_context_.IsSdchResponse()) {
202         // This is a very corrupt SDCH request response.  We can't decode it.
203         // We'll use a meta-refresh, and get content without asking for SDCH.
204         // This will also progressively disable SDCH for this domain.
205       } else {
206         // One of the first 9 bytes precluded consideration as a hash.
207         // This can't be an SDCH payload, even though the server said it was.
208         // This is a major error, as the server or proxy tagged this SDCH even
209         // though it is not!
210         // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!!
211         // Worse yet, meta-refresh could lead to an infinite refresh loop.
212         SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH);
213         decoding_status_ = PASS_THROUGH;
214         // ... but further back-off on advertising SDCH support.
215         SdchManager::BlacklistDomain(url_);
216       }
217
218       if (decoding_status_ == PASS_THROUGH) {
219         dest_buffer_excess_ = dictionary_hash_;  // Send what we scanned.
220       } else {
221         // This is where we try to do the expensive meta-refresh.
222         if (std::string::npos == mime_type_.find("text/html")) {
223           // Since we can't do a meta-refresh (along with an exponential
224           // backoff), we'll just make sure this NEVER happens again.
225           SdchManager::BlacklistDomainForever(url_);
226           if (filter_context_.IsCachedContent())
227             SdchManager::SdchErrorRecovery(
228                 SdchManager::CACHED_META_REFRESH_UNSUPPORTED);
229           else
230             SdchManager::SdchErrorRecovery(
231                 SdchManager::META_REFRESH_UNSUPPORTED);
232           return FILTER_ERROR;
233         }
234         // HTML content means we can issue a meta-refresh, and get the content
235         // again, perhaps without SDCH (to be safe).
236         if (filter_context_.IsCachedContent()) {
237           // Cached content is probably a startup tab, so we'll just get fresh
238           // content and try again, without disabling sdch.
239           SdchManager::SdchErrorRecovery(
240               SdchManager::META_REFRESH_CACHED_RECOVERY);
241         } else {
242           // Since it wasn't in the cache, we definately need at least some
243           // period of blacklisting to get the correct content.
244           SdchManager::BlacklistDomain(url_);
245           SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY);
246         }
247         decoding_status_ = META_REFRESH_RECOVERY;
248         // Issue a meta redirect with SDCH disabled.
249         dest_buffer_excess_ = kDecompressionErrorHtml;
250       }
251     } else {
252       DCHECK_EQ(DECODING_IN_PROGRESS, decoding_status_);
253     }
254   }
255
256   int amount = OutputBufferExcess(dest_buffer, available_space);
257   *dest_len += amount;
258   dest_buffer += amount;
259   available_space -= amount;
260   DCHECK_GE(available_space, 0);
261
262   if (available_space <= 0)
263     return FILTER_OK;
264   DCHECK(dest_buffer_excess_.empty());
265   DCHECK_EQ(0u, dest_buffer_excess_index_);
266
267   if (decoding_status_ != DECODING_IN_PROGRESS) {
268     if (META_REFRESH_RECOVERY == decoding_status_) {
269       // Absorb all input data.  We've already output page reload HTML.
270       next_stream_data_ = NULL;
271       stream_data_len_ = 0;
272       return FILTER_NEED_MORE_DATA;
273     }
274     if (PASS_THROUGH == decoding_status_) {
275       // We must pass in available_space, but it will be changed to bytes_used.
276       FilterStatus result = CopyOut(dest_buffer, &available_space);
277       // Accumulate the returned count of bytes_used (a.k.a., available_space).
278       *dest_len += available_space;
279       return result;
280     }
281     DCHECK(false);
282     decoding_status_ = DECODING_ERROR;
283     return FILTER_ERROR;
284   }
285
286   if (!next_stream_data_ || stream_data_len_ <= 0)
287     return FILTER_NEED_MORE_DATA;
288
289   bool ret = vcdiff_streaming_decoder_->DecodeChunk(
290     next_stream_data_, stream_data_len_, &dest_buffer_excess_);
291   // Assume all data was used in decoding.
292   next_stream_data_ = NULL;
293   source_bytes_ += stream_data_len_;
294   stream_data_len_ = 0;
295   output_bytes_ += dest_buffer_excess_.size();
296   if (!ret) {
297     vcdiff_streaming_decoder_.reset(NULL);  // Don't call it again.
298     decoding_status_ = DECODING_ERROR;
299     SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR);
300     return FILTER_ERROR;
301   }
302
303   amount = OutputBufferExcess(dest_buffer, available_space);
304   *dest_len += amount;
305   dest_buffer += amount;
306   available_space -= amount;
307   if (0 == available_space && !dest_buffer_excess_.empty())
308       return FILTER_OK;
309   return FILTER_NEED_MORE_DATA;
310 }
311
312 Filter::FilterStatus SdchFilter::InitializeDictionary() {
313   const size_t kServerIdLength = 9;  // Dictionary hash plus null from server.
314   size_t bytes_needed = kServerIdLength - dictionary_hash_.size();
315   DCHECK_GT(bytes_needed, 0u);
316   if (!next_stream_data_)
317     return FILTER_NEED_MORE_DATA;
318   if (static_cast<size_t>(stream_data_len_) < bytes_needed) {
319     dictionary_hash_.append(next_stream_data_, stream_data_len_);
320     next_stream_data_ = NULL;
321     stream_data_len_ = 0;
322     return FILTER_NEED_MORE_DATA;
323   }
324   dictionary_hash_.append(next_stream_data_, bytes_needed);
325   DCHECK(kServerIdLength == dictionary_hash_.size());
326   stream_data_len_ -= bytes_needed;
327   DCHECK_LE(0, stream_data_len_);
328   if (stream_data_len_ > 0)
329     next_stream_data_ += bytes_needed;
330   else
331     next_stream_data_ = NULL;
332
333   DCHECK(!dictionary_.get());
334   dictionary_hash_is_plausible_ = true;  // Assume plausible, but check.
335
336   SdchManager::Dictionary* dictionary = NULL;
337   if ('\0' == dictionary_hash_[kServerIdLength - 1])
338     SdchManager::Global()->GetVcdiffDictionary(std::string(dictionary_hash_, 0,
339                                                            kServerIdLength - 1),
340                                                url_, &dictionary);
341   else
342     dictionary_hash_is_plausible_ = false;
343
344   if (!dictionary) {
345     DCHECK(dictionary_hash_.size() == kServerIdLength);
346     // Since dictionary was not found, check to see if hash was even plausible.
347     for (size_t i = 0; i < kServerIdLength - 1; ++i) {
348       char base64_char = dictionary_hash_[i];
349       if (!isalnum(base64_char) && '-' != base64_char && '_' != base64_char) {
350         dictionary_hash_is_plausible_ = false;
351         break;
352       }
353     }
354     if (dictionary_hash_is_plausible_)
355       SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND);
356     else
357       SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED);
358     decoding_status_ = DECODING_ERROR;
359     return FILTER_ERROR;
360   }
361   dictionary_ = dictionary;
362   vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder);
363   vcdiff_streaming_decoder_->SetAllowVcdTarget(false);
364   vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(),
365                                            dictionary_->text().size());
366   decoding_status_ = DECODING_IN_PROGRESS;
367   return FILTER_OK;
368 }
369
370 int SdchFilter::OutputBufferExcess(char* const dest_buffer,
371                                    size_t available_space) {
372   if (dest_buffer_excess_.empty())
373     return 0;
374   DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_);
375   size_t amount = std::min(available_space,
376       dest_buffer_excess_.size() - dest_buffer_excess_index_);
377   memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_,
378          amount);
379   dest_buffer_excess_index_ += amount;
380   if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) {
381     DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_);
382     dest_buffer_excess_.clear();
383     dest_buffer_excess_index_ = 0;
384   }
385   return amount;
386 }
387
388 }  // namespace net