[M108 Migration][HBBTV] Implement ewk_context_register_jsplugin_mime_types API
[platform/framework/web/chromium-efl.git] / courgette / ensemble_create.cc
1 // Copyright 2011 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // The main idea in Courgette is to do patching *under a tranformation*.  The
6 // input is transformed into a new representation, patching occurs in the new
7 // repesentation, and then the tranform is reversed to get the patched data.
8 //
9 // The idea is applied to pieces (or 'elements') of the whole (or 'ensemble').
10 // Each of the elements has to go through the same set of steps in lock-step.
11
12 // This file contains the code to create the patch.
13
14
15 #include "courgette/ensemble.h"
16
17 #include <stddef.h>
18
19 #include <limits>
20 #include <vector>
21
22 #include "base/logging.h"
23 #include "base/time/time.h"
24
25 #include "courgette/crc.h"
26 #include "courgette/difference_estimator.h"
27 #include "courgette/patch_generator_x86_32.h"
28 #include "courgette/patcher_x86_32.h"
29 #include "courgette/region.h"
30 #include "courgette/simple_delta.h"
31 #include "courgette/streams.h"
32 #include "courgette/third_party/bsdiff/bsdiff.h"
33
34 namespace courgette {
35
36 TransformationPatchGenerator::TransformationPatchGenerator(
37     Element* old_element,
38     Element* new_element,
39     TransformationPatcher* patcher)
40     : old_element_(old_element),
41       new_element_(new_element),
42       patcher_(patcher) {
43 }
44
45 TransformationPatchGenerator::~TransformationPatchGenerator() {
46   delete patcher_;
47 }
48
49 // The default implementation of PredictTransformParameters delegates to the
50 // patcher.
51 Status TransformationPatchGenerator::PredictTransformParameters(
52     SinkStreamSet* prediction) {
53   return patcher_->PredictTransformParameters(prediction);
54 }
55
56 // The default implementation of Reform delegates to the patcher.
57 Status TransformationPatchGenerator::Reform(
58     SourceStreamSet* transformed_element,
59     SinkStream* reformed_element) {
60   return patcher_->Reform(transformed_element, reformed_element);
61 }
62
63 // Makes a TransformationPatchGenerator of the appropriate variety for the
64 // Element kind.
65 TransformationPatchGenerator* MakeGenerator(Element* old_element,
66                                             Element* new_element) {
67   switch (new_element->kind()) {
68     case EXE_UNKNOWN:
69       break;
70     case EXE_WIN_32_X86: {
71       TransformationPatchGenerator* generator =
72           new PatchGeneratorX86_32(
73               old_element,
74               new_element,
75               new PatcherX86_32(old_element->region()),
76               EXE_WIN_32_X86);
77       return generator;
78     }
79     case EXE_ELF_32_X86: {
80       TransformationPatchGenerator* generator =
81           new PatchGeneratorX86_32(
82               old_element,
83               new_element,
84               new PatcherX86_32(old_element->region()),
85               EXE_ELF_32_X86);
86       return generator;
87     }
88     case EXE_WIN_32_X64: {
89       TransformationPatchGenerator* generator =
90           new PatchGeneratorX86_32(
91               old_element,
92               new_element,
93               new PatcherX86_32(old_element->region()),
94               EXE_WIN_32_X64);
95       return generator;
96     }
97   }
98
99   LOG(WARNING) << "Unexpected Element::Kind " << old_element->kind();
100   return nullptr;
101 }
102
103 // Checks to see if the proposed comparison is 'unsafe'.  Sometimes one element
104 // from 'old' is matched as the closest element to multiple elements from 'new'.
105 // Each time this happens, the old element is transformed and serialized.  This
106 // is a problem when the old element is huge compared with the new element
107 // because the mutliple serialized copies can be much bigger than the size of
108 // either ensemble.
109 //
110 // The right way to avoid this is to ensure any one element from 'old' is
111 // serialized once, which requires matching code in the patch application.
112 //
113 // This is a quick hack to avoid the problem by prohibiting a big difference in
114 // size between matching elements.
115 bool UnsafeDifference(Element* old_element, Element* new_element) {
116   double kMaxBloat = 2.0;
117   size_t kMinWorrysomeDifference = 2 << 20;  // 2MB
118   size_t old_size = old_element->region().length();
119   size_t new_size = new_element->region().length();
120   size_t low_size = std::min(old_size, new_size);
121   size_t high_size = std::max(old_size, new_size);
122   if (high_size - low_size < kMinWorrysomeDifference) return false;
123   if (high_size < low_size * kMaxBloat) return false;
124   return true;
125 }
126
127 // FindGenerators finds TransformationPatchGenerators for the elements of
128 // |new_ensemble|.  For each element of |new_ensemble| we find the closest
129 // matching element from |old_ensemble| and use that as the basis for
130 // differential compression.  The elements have to be the same kind so as to
131 // support transformation into the same kind of 'new representation'.
132 //
133 Status FindGenerators(Ensemble* old_ensemble, Ensemble* new_ensemble,
134                       std::vector<TransformationPatchGenerator*>* generators) {
135   base::Time start_find_time = base::Time::Now();
136   old_ensemble->FindEmbeddedElements();
137   new_ensemble->FindEmbeddedElements();
138   VLOG(1) << "done FindEmbeddedElements "
139           << (base::Time::Now() - start_find_time).InSecondsF();
140
141   std::vector<Element*> old_elements(old_ensemble->elements());
142   std::vector<Element*> new_elements(new_ensemble->elements());
143
144   VLOG(1) << "old has " << old_elements.size() << " elements";
145   VLOG(1) << "new has " << new_elements.size() << " elements";
146
147   DifferenceEstimator difference_estimator;
148   std::vector<DifferenceEstimator::Base*> bases;
149
150   base::Time start_bases_time = base::Time::Now();
151   for (size_t i = 0;  i < old_elements.size();  ++i) {
152     bases.push_back(
153         difference_estimator.MakeBase(old_elements[i]->region()));
154   }
155   VLOG(1) << "done make bases "
156           << (base::Time::Now() - start_bases_time).InSecondsF() << "s";
157
158   for (size_t new_index = 0;  new_index < new_elements.size();  ++new_index) {
159     Element* new_element = new_elements[new_index];
160     DifferenceEstimator::Subject* new_subject =
161         difference_estimator.MakeSubject(new_element->region());
162
163     // Search through old elements to find the best match.
164     //
165     // TODO(sra): This is O(N x M), i.e. O(N^2) since old_ensemble and
166     // new_ensemble probably have a very similar structure.  We can make the
167     // search faster by making the comparison provided by DifferenceEstimator
168     // more nuanced, returning early if the measured difference is greater than
169     // the current best.  This will be most effective if we can arrange that the
170     // first elements we try to match are likely the 'right' ones.  We could
171     // prioritize elements that are of a similar size or similar position in the
172     // sequence of elements.
173     //
174     Element* best_old_element = nullptr;
175     size_t best_difference = std::numeric_limits<size_t>::max();
176     for (size_t old_index = 0;  old_index < old_elements.size();  ++old_index) {
177       Element* old_element = old_elements[old_index];
178       // Elements of different kinds are incompatible.
179       if (old_element->kind() != new_element->kind())
180         continue;
181
182       if (UnsafeDifference(old_element, new_element))
183         continue;
184
185       base::Time start_compare = base::Time::Now();
186       DifferenceEstimator::Base* old_base = bases[old_index];
187       size_t difference = difference_estimator.Measure(old_base, new_subject);
188
189       VLOG(1) << "Compare " << old_element->Name()
190               << " to " << new_element->Name()
191               << " --> " << difference
192               << " in " << (base::Time::Now() - start_compare).InSecondsF()
193               << "s";
194       if (difference == 0) {
195         VLOG(1) << "Skip " << new_element->Name()
196                 << " - identical to " << old_element->Name();
197         best_difference = 0;
198         best_old_element = nullptr;
199         break;
200       }
201       if (difference < best_difference) {
202         best_difference = difference;
203         best_old_element = old_element;
204       }
205     }
206
207     if (best_old_element) {
208       VLOG(1) << "Matched " << best_old_element->Name()
209               << " to " << new_element->Name()
210               << " --> " << best_difference;
211       TransformationPatchGenerator* generator =
212           MakeGenerator(best_old_element, new_element);
213       if (generator)
214         generators->push_back(generator);
215     }
216   }
217
218   VLOG(1) << "done FindGenerators found " << generators->size()
219           << " in " << (base::Time::Now() - start_find_time).InSecondsF()
220           << "s";
221
222   return C_OK;
223 }
224
225 void FreeGenerators(std::vector<TransformationPatchGenerator*>* generators) {
226   for (size_t i = 0;  i < generators->size();  ++i) {
227     delete (*generators)[i];
228   }
229   generators->clear();
230 }
231
232 ////////////////////////////////////////////////////////////////////////////////
233
234 Status GenerateEnsemblePatch(SourceStream* base,
235                              SourceStream* update,
236                              SinkStream* final_patch) {
237   VLOG(1) << "start GenerateEnsemblePatch";
238   base::Time start_time = base::Time::Now();
239
240   Region old_region(base->Buffer(), base->Remaining());
241   Region new_region(update->Buffer(), update->Remaining());
242   Ensemble old_ensemble(old_region, "old");
243   Ensemble new_ensemble(new_region, "new");
244   std::vector<TransformationPatchGenerator*> generators;
245   Status generators_status = FindGenerators(&old_ensemble, &new_ensemble,
246                                             &generators);
247   if (generators_status != C_OK)
248     return generators_status;
249
250   SinkStreamSet patch_streams;
251
252   SinkStream* tranformation_descriptions      = patch_streams.stream(0);
253   SinkStream* parameter_correction            = patch_streams.stream(1);
254   SinkStream* transformed_elements_correction = patch_streams.stream(2);
255   SinkStream* ensemble_correction             = patch_streams.stream(3);
256
257   size_t number_of_transformations = generators.size();
258   if (!tranformation_descriptions->WriteSizeVarint32(number_of_transformations))
259     return C_STREAM_ERROR;
260
261   for (size_t i = 0;  i < number_of_transformations;  ++i) {
262     ExecutableType kind = generators[i]->Kind();
263     if (!tranformation_descriptions->WriteVarint32(kind))
264       return C_STREAM_ERROR;
265   }
266
267   for (size_t i = 0;  i < number_of_transformations;  ++i) {
268     Status status =
269         generators[i]->WriteInitialParameters(tranformation_descriptions);
270     if (status != C_OK)
271       return status;
272   }
273
274   //
275   // Generate sub-patch for parameters.
276   //
277   SinkStreamSet predicted_parameters_sink;
278   SinkStreamSet corrected_parameters_sink;
279
280   for (size_t i = 0;  i < number_of_transformations;  ++i) {
281     SinkStreamSet single_predicted_parameters;
282     Status status;
283     status = generators[i]->PredictTransformParameters(
284         &single_predicted_parameters);
285     if (status != C_OK)
286       return status;
287     if (!predicted_parameters_sink.WriteSet(&single_predicted_parameters))
288       return C_STREAM_ERROR;
289
290     SinkStreamSet single_corrected_parameters;
291     status = generators[i]->CorrectedTransformParameters(
292         &single_corrected_parameters);
293     if (status != C_OK)
294       return status;
295     if (!corrected_parameters_sink.WriteSet(&single_corrected_parameters))
296       return C_STREAM_ERROR;
297   }
298
299   SinkStream linearized_predicted_parameters;
300   SinkStream linearized_corrected_parameters;
301
302   if (!predicted_parameters_sink.CopyTo(&linearized_predicted_parameters))
303     return C_STREAM_ERROR;
304   if (!corrected_parameters_sink.CopyTo(&linearized_corrected_parameters))
305     return C_STREAM_ERROR;
306
307   SourceStream predicted_parameters_source;
308   SourceStream corrected_parameters_source;
309   predicted_parameters_source.Init(linearized_predicted_parameters);
310   corrected_parameters_source.Init(linearized_corrected_parameters);
311
312   Status delta1_status = GenerateSimpleDelta(&predicted_parameters_source,
313                                              &corrected_parameters_source,
314                                              parameter_correction);
315   if (delta1_status != C_OK)
316     return delta1_status;
317
318   //
319   // Generate sub-patch for elements.
320   //
321   corrected_parameters_source.Init(linearized_corrected_parameters);
322   SourceStreamSet corrected_parameters_source_set;
323   if (!corrected_parameters_source_set.Init(&corrected_parameters_source))
324     return C_STREAM_ERROR;
325
326   SinkStreamSet predicted_transformed_elements;
327   SinkStreamSet corrected_transformed_elements;
328
329   for (size_t i = 0;  i < number_of_transformations;  ++i) {
330     SourceStreamSet single_parameters;
331     if (!corrected_parameters_source_set.ReadSet(&single_parameters))
332       return C_STREAM_ERROR;
333     SinkStreamSet single_predicted_transformed_element;
334     SinkStreamSet single_corrected_transformed_element;
335     Status status = generators[i]->Transform(
336         &single_parameters,
337         &single_predicted_transformed_element,
338         &single_corrected_transformed_element);
339     if (status != C_OK)
340       return status;
341     if (!single_parameters.Empty())
342       return C_STREAM_NOT_CONSUMED;
343     if (!predicted_transformed_elements.WriteSet(
344             &single_predicted_transformed_element))
345       return C_STREAM_ERROR;
346     if (!corrected_transformed_elements.WriteSet(
347             &single_corrected_transformed_element))
348       return C_STREAM_ERROR;
349   }
350
351   if (!corrected_parameters_source_set.Empty())
352     return C_STREAM_NOT_CONSUMED;
353
354   SinkStream linearized_predicted_transformed_elements;
355   SinkStream linearized_corrected_transformed_elements;
356
357   if (!predicted_transformed_elements.CopyTo(
358           &linearized_predicted_transformed_elements))
359     return C_STREAM_ERROR;
360   if (!corrected_transformed_elements.CopyTo(
361           &linearized_corrected_transformed_elements))
362     return C_STREAM_ERROR;
363
364   SourceStream predicted_transformed_elements_source;
365   SourceStream corrected_transformed_elements_source;
366   predicted_transformed_elements_source
367       .Init(linearized_predicted_transformed_elements);
368   corrected_transformed_elements_source
369       .Init(linearized_corrected_transformed_elements);
370
371   Status delta2_status =
372       GenerateSimpleDelta(&predicted_transformed_elements_source,
373                           &corrected_transformed_elements_source,
374                           transformed_elements_correction);
375   if (delta2_status != C_OK)
376     return delta2_status;
377
378   // Last use, free storage.
379   linearized_predicted_transformed_elements.Retire();
380
381   //
382   // Generate sub-patch for whole enchilada.
383   //
384   SinkStream predicted_ensemble;
385
386   if (!predicted_ensemble.Write(base->Buffer(), base->Remaining()))
387     return C_STREAM_ERROR;
388
389   SourceStreamSet corrected_transformed_elements_source_set;
390   corrected_transformed_elements_source
391       .Init(linearized_corrected_transformed_elements);
392   if (!corrected_transformed_elements_source_set
393       .Init(&corrected_transformed_elements_source))
394     return C_STREAM_ERROR;
395
396   for (size_t i = 0;  i < number_of_transformations;  ++i) {
397     SourceStreamSet single_corrected_transformed_element;
398     if (!corrected_transformed_elements_source_set.ReadSet(
399             &single_corrected_transformed_element))
400       return C_STREAM_ERROR;
401     Status status = generators[i]->Reform(&single_corrected_transformed_element,
402                                           &predicted_ensemble);
403     if (status != C_OK)
404       return status;
405     if (!single_corrected_transformed_element.Empty())
406       return C_STREAM_NOT_CONSUMED;
407   }
408
409   if (!corrected_transformed_elements_source_set.Empty())
410     return C_STREAM_NOT_CONSUMED;
411
412   // No more references to this stream's buffer.
413   linearized_corrected_transformed_elements.Retire();
414
415   FreeGenerators(&generators);
416
417   size_t final_patch_input_size = predicted_ensemble.Length();
418   SourceStream predicted_ensemble_source;
419   predicted_ensemble_source.Init(predicted_ensemble);
420   Status delta3_status = GenerateSimpleDelta(&predicted_ensemble_source,
421                                              update,
422                                              ensemble_correction);
423   if (delta3_status != C_OK)
424     return delta3_status;
425
426   //
427   // Final output stream has a header followed by a StreamSet.
428   //
429   if (!final_patch->WriteVarint32(CourgettePatchFile::kMagic) ||
430       !final_patch->WriteVarint32(CourgettePatchFile::kVersion) ||
431       !final_patch->WriteVarint32(CalculateCrc(old_region.start(),
432                                                old_region.length())) ||
433       !final_patch->WriteVarint32(CalculateCrc(new_region.start(),
434                                                new_region.length())) ||
435       !final_patch->WriteSizeVarint32(final_patch_input_size) ||
436       !patch_streams.CopyTo(final_patch)) {
437     return C_STREAM_ERROR;
438   }
439
440   VLOG(1) << "done GenerateEnsemblePatch "
441           << (base::Time::Now() - start_time).InSecondsF() << "s";
442
443   return C_OK;
444 }
445
446 }  // namespace