- add sources.
[platform/framework/web/crosswalk.git] / src / courgette / ensemble_create.cc
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // The main idea in Courgette is to do patching *under a tranformation*.  The
6 // input is transformed into a new representation, patching occurs in the new
7 // repesentation, and then the tranform is reversed to get the patched data.
8 //
9 // The idea is applied to pieces (or 'elements') of the whole (or 'ensemble').
10 // Each of the elements has to go through the same set of steps in lock-step.
11
12 // This file contains the code to create the patch.
13
14
15 #include "courgette/ensemble.h"
16
17 #include <limits>
18 #include <vector>
19
20 #include "base/basictypes.h"
21 #include "base/logging.h"
22 #include "base/time/time.h"
23
24 #include "courgette/crc.h"
25 #include "courgette/difference_estimator.h"
26 #include "courgette/region.h"
27 #include "courgette/simple_delta.h"
28 #include "courgette/streams.h"
29 #include "courgette/third_party/bsdiff.h"
30
31 #include "courgette/patcher_x86_32.h"
32 #include "courgette/patch_generator_x86_32.h"
33
34 namespace courgette {
35
36 TransformationPatchGenerator::TransformationPatchGenerator(
37     Element* old_element,
38     Element* new_element,
39     TransformationPatcher* patcher)
40     : old_element_(old_element),
41       new_element_(new_element),
42       patcher_(patcher) {
43 }
44
45 TransformationPatchGenerator::~TransformationPatchGenerator() {
46   delete patcher_;
47 }
48
49 // The default implementation of PredictTransformParameters delegates to the
50 // patcher.
51 Status TransformationPatchGenerator::PredictTransformParameters(
52     SinkStreamSet* prediction) {
53   return patcher_->PredictTransformParameters(prediction);
54 }
55
56 // The default implementation of Reform delegates to the patcher.
57 Status TransformationPatchGenerator::Reform(
58     SourceStreamSet* transformed_element,
59     SinkStream* reformed_element) {
60   return patcher_->Reform(transformed_element, reformed_element);
61 }
62
63 // Makes a TransformationPatchGenerator of the appropriate variety for the
64 // Element kind.
65 TransformationPatchGenerator* MakeGenerator(Element* old_element,
66                                             Element* new_element) {
67   switch (new_element->kind()) {
68     case EXE_UNKNOWN:
69       break;
70     case EXE_WIN_32_X86: {
71       TransformationPatchGenerator* generator =
72           new PatchGeneratorX86_32(
73               old_element,
74               new_element,
75               new PatcherX86_32(old_element->region()),
76               EXE_WIN_32_X86);
77       return generator;
78     }
79     case EXE_ELF_32_X86: {
80       TransformationPatchGenerator* generator =
81           new PatchGeneratorX86_32(
82               old_element,
83               new_element,
84               new PatcherX86_32(old_element->region()),
85               EXE_ELF_32_X86);
86       return generator;
87     }
88     case EXE_ELF_32_ARM: {
89       TransformationPatchGenerator* generator =
90           new PatchGeneratorX86_32(
91               old_element,
92               new_element,
93               new PatcherX86_32(old_element->region()),
94               EXE_ELF_32_ARM);
95       return generator;
96     }
97     case EXE_WIN_32_X64: {
98       TransformationPatchGenerator* generator =
99           new PatchGeneratorX86_32(
100               old_element,
101               new_element,
102               new PatcherX86_32(old_element->region()),
103               EXE_WIN_32_X64);
104       return generator;
105     }
106   }
107
108   LOG(WARNING) << "Unexpected Element::Kind " << old_element->kind();
109   return NULL;
110 }
111
112 // Checks to see if the proposed comparison is 'unsafe'.  Sometimes one element
113 // from 'old' is matched as the closest element to multiple elements from 'new'.
114 // Each time this happens, the old element is transformed and serialized.  This
115 // is a problem when the old element is huge compared with the new element
116 // because the mutliple serialized copies can be much bigger than the size of
117 // either ensemble.
118 //
119 // The right way to avoid this is to ensure any one element from 'old' is
120 // serialized once, which requires matching code in the patch application.
121 //
122 // This is a quick hack to avoid the problem by prohibiting a big difference in
123 // size between matching elements.
124 bool UnsafeDifference(Element* old_element, Element* new_element) {
125   double kMaxBloat = 2.0;
126   size_t kMinWorrysomeDifference = 2 << 20;  // 2MB
127   size_t old_size = old_element->region().length();
128   size_t new_size = new_element->region().length();
129   size_t low_size = std::min(old_size, new_size);
130   size_t high_size = std::max(old_size, new_size);
131   if (high_size - low_size < kMinWorrysomeDifference) return false;
132   if (high_size < low_size * kMaxBloat) return false;
133   return true;
134 }
135
136 // FindGenerators finds TransformationPatchGenerators for the elements of
137 // |new_ensemble|.  For each element of |new_ensemble| we find the closest
138 // matching element from |old_ensemble| and use that as the basis for
139 // differential compression.  The elements have to be the same kind so as to
140 // support transformation into the same kind of 'new representation'.
141 //
142 Status FindGenerators(Ensemble* old_ensemble, Ensemble* new_ensemble,
143                       std::vector<TransformationPatchGenerator*>* generators) {
144   base::Time start_find_time = base::Time::Now();
145   old_ensemble->FindEmbeddedElements();
146   new_ensemble->FindEmbeddedElements();
147   VLOG(1) << "done FindEmbeddedElements "
148           << (base::Time::Now() - start_find_time).InSecondsF();
149
150   std::vector<Element*> old_elements(old_ensemble->elements());
151   std::vector<Element*> new_elements(new_ensemble->elements());
152
153   VLOG(1) << "old has " << old_elements.size() << " elements";
154   VLOG(1) << "new has " << new_elements.size() << " elements";
155
156   DifferenceEstimator difference_estimator;
157   std::vector<DifferenceEstimator::Base*> bases;
158
159   base::Time start_bases_time = base::Time::Now();
160   for (size_t i = 0;  i < old_elements.size();  ++i) {
161     bases.push_back(
162         difference_estimator.MakeBase(old_elements[i]->region()));
163   }
164   VLOG(1) << "done make bases "
165           << (base::Time::Now() - start_bases_time).InSecondsF() << "s";
166
167   for (size_t new_index = 0;  new_index < new_elements.size();  ++new_index) {
168     Element* new_element = new_elements[new_index];
169     DifferenceEstimator::Subject* new_subject =
170         difference_estimator.MakeSubject(new_element->region());
171
172     // Search through old elements to find the best match.
173     //
174     // TODO(sra): This is O(N x M), i.e. O(N^2) since old_ensemble and
175     // new_ensemble probably have a very similar structure.  We can make the
176     // search faster by making the comparison provided by DifferenceEstimator
177     // more nuanced, returning early if the measured difference is greater than
178     // the current best.  This will be most effective if we can arrange that the
179     // first elements we try to match are likely the 'right' ones.  We could
180     // prioritize elements that are of a similar size or similar position in the
181     // sequence of elements.
182     //
183     Element* best_old_element = NULL;
184     size_t best_difference = std::numeric_limits<size_t>::max();
185     for (size_t old_index = 0;  old_index < old_elements.size();  ++old_index) {
186       Element* old_element = old_elements[old_index];
187       // Elements of different kinds are incompatible.
188       if (old_element->kind() != new_element->kind())
189         continue;
190
191       if (UnsafeDifference(old_element, new_element))
192         continue;
193
194       base::Time start_compare = base::Time::Now();
195       DifferenceEstimator::Base* old_base = bases[old_index];
196       size_t difference = difference_estimator.Measure(old_base, new_subject);
197
198       VLOG(1) << "Compare " << old_element->Name()
199               << " to " << new_element->Name()
200               << " --> " << difference
201               << " in " << (base::Time::Now() - start_compare).InSecondsF()
202               << "s";
203       if (difference == 0) {
204         VLOG(1) << "Skip " << new_element->Name()
205                 << " - identical to " << old_element->Name();
206         best_difference = 0;
207         best_old_element = NULL;
208         break;
209       }
210       if (difference < best_difference) {
211         best_difference = difference;
212         best_old_element = old_element;
213       }
214     }
215
216     if (best_old_element) {
217       VLOG(1) << "Matched " << best_old_element->Name()
218               << " to " << new_element->Name()
219               << " --> " << best_difference;
220       TransformationPatchGenerator* generator =
221           MakeGenerator(best_old_element, new_element);
222       if (generator)
223         generators->push_back(generator);
224     }
225   }
226
227   VLOG(1) << "done FindGenerators found " << generators->size()
228           << " in " << (base::Time::Now() - start_find_time).InSecondsF()
229           << "s";
230
231   return C_OK;
232 }
233
234 void FreeGenerators(std::vector<TransformationPatchGenerator*>* generators) {
235   for (size_t i = 0;  i < generators->size();  ++i) {
236     delete (*generators)[i];
237   }
238   generators->clear();
239 }
240
241 ////////////////////////////////////////////////////////////////////////////////
242
243 Status GenerateEnsemblePatch(SourceStream* base,
244                              SourceStream* update,
245                              SinkStream* final_patch) {
246   VLOG(1) << "start GenerateEnsemblePatch";
247   base::Time start_time = base::Time::Now();
248
249   Region old_region(base->Buffer(), base->Remaining());
250   Region new_region(update->Buffer(), update->Remaining());
251   Ensemble old_ensemble(old_region, "old");
252   Ensemble new_ensemble(new_region, "new");
253   std::vector<TransformationPatchGenerator*> generators;
254   Status generators_status = FindGenerators(&old_ensemble, &new_ensemble,
255                                             &generators);
256   if (generators_status != C_OK)
257     return generators_status;
258
259   SinkStreamSet patch_streams;
260
261   SinkStream* tranformation_descriptions      = patch_streams.stream(0);
262   SinkStream* parameter_correction            = patch_streams.stream(1);
263   SinkStream* transformed_elements_correction = patch_streams.stream(2);
264   SinkStream* ensemble_correction             = patch_streams.stream(3);
265
266   size_t number_of_transformations = generators.size();
267   if (!tranformation_descriptions->WriteSizeVarint32(number_of_transformations))
268     return C_STREAM_ERROR;
269
270   for (size_t i = 0;  i < number_of_transformations;  ++i) {
271     ExecutableType kind = generators[i]->Kind();
272     if (!tranformation_descriptions->WriteVarint32(kind))
273       return C_STREAM_ERROR;
274   }
275
276   for (size_t i = 0;  i < number_of_transformations;  ++i) {
277     Status status =
278         generators[i]->WriteInitialParameters(tranformation_descriptions);
279     if (status != C_OK)
280       return status;
281   }
282
283   //
284   // Generate sub-patch for parameters.
285   //
286   SinkStreamSet predicted_parameters_sink;
287   SinkStreamSet corrected_parameters_sink;
288
289   for (size_t i = 0;  i < number_of_transformations;  ++i) {
290     SinkStreamSet single_predicted_parameters;
291     Status status;
292     status = generators[i]->PredictTransformParameters(
293         &single_predicted_parameters);
294     if (status != C_OK)
295       return status;
296     if (!predicted_parameters_sink.WriteSet(&single_predicted_parameters))
297       return C_STREAM_ERROR;
298
299     SinkStreamSet single_corrected_parameters;
300     status = generators[i]->CorrectedTransformParameters(
301         &single_corrected_parameters);
302     if (status != C_OK)
303       return status;
304     if (!corrected_parameters_sink.WriteSet(&single_corrected_parameters))
305       return C_STREAM_ERROR;
306   }
307
308   SinkStream linearized_predicted_parameters;
309   SinkStream linearized_corrected_parameters;
310
311   if (!predicted_parameters_sink.CopyTo(&linearized_predicted_parameters))
312     return C_STREAM_ERROR;
313   if (!corrected_parameters_sink.CopyTo(&linearized_corrected_parameters))
314     return C_STREAM_ERROR;
315
316   SourceStream predicted_parameters_source;
317   SourceStream corrected_parameters_source;
318   predicted_parameters_source.Init(linearized_predicted_parameters);
319   corrected_parameters_source.Init(linearized_corrected_parameters);
320
321   Status delta1_status = GenerateSimpleDelta(&predicted_parameters_source,
322                                              &corrected_parameters_source,
323                                              parameter_correction);
324   if (delta1_status != C_OK)
325     return delta1_status;
326
327   //
328   // Generate sub-patch for elements.
329   //
330   corrected_parameters_source.Init(linearized_corrected_parameters);
331   SourceStreamSet corrected_parameters_source_set;
332   if (!corrected_parameters_source_set.Init(&corrected_parameters_source))
333     return C_STREAM_ERROR;
334
335   SinkStreamSet predicted_transformed_elements;
336   SinkStreamSet corrected_transformed_elements;
337
338   for (size_t i = 0;  i < number_of_transformations;  ++i) {
339     SourceStreamSet single_parameters;
340     if (!corrected_parameters_source_set.ReadSet(&single_parameters))
341       return C_STREAM_ERROR;
342     SinkStreamSet single_predicted_transformed_element;
343     SinkStreamSet single_corrected_transformed_element;
344     Status status = generators[i]->Transform(
345         &single_parameters,
346         &single_predicted_transformed_element,
347         &single_corrected_transformed_element);
348     if (status != C_OK)
349       return status;
350     if (!single_parameters.Empty())
351       return C_STREAM_NOT_CONSUMED;
352     if (!predicted_transformed_elements.WriteSet(
353             &single_predicted_transformed_element))
354       return C_STREAM_ERROR;
355     if (!corrected_transformed_elements.WriteSet(
356             &single_corrected_transformed_element))
357       return C_STREAM_ERROR;
358   }
359
360   if (!corrected_parameters_source_set.Empty())
361     return C_STREAM_NOT_CONSUMED;
362
363   SinkStream linearized_predicted_transformed_elements;
364   SinkStream linearized_corrected_transformed_elements;
365
366   if (!predicted_transformed_elements.CopyTo(
367           &linearized_predicted_transformed_elements))
368     return C_STREAM_ERROR;
369   if (!corrected_transformed_elements.CopyTo(
370           &linearized_corrected_transformed_elements))
371     return C_STREAM_ERROR;
372
373   SourceStream predicted_transformed_elements_source;
374   SourceStream corrected_transformed_elements_source;
375   predicted_transformed_elements_source
376       .Init(linearized_predicted_transformed_elements);
377   corrected_transformed_elements_source
378       .Init(linearized_corrected_transformed_elements);
379
380   Status delta2_status =
381       GenerateSimpleDelta(&predicted_transformed_elements_source,
382                           &corrected_transformed_elements_source,
383                           transformed_elements_correction);
384   if (delta2_status != C_OK)
385     return delta2_status;
386
387   // Last use, free storage.
388   linearized_predicted_transformed_elements.Retire();
389
390   //
391   // Generate sub-patch for whole enchilada.
392   //
393   SinkStream predicted_ensemble;
394
395   if (!predicted_ensemble.Write(base->Buffer(), base->Remaining()))
396     return C_STREAM_ERROR;
397
398   SourceStreamSet corrected_transformed_elements_source_set;
399   corrected_transformed_elements_source
400       .Init(linearized_corrected_transformed_elements);
401   if (!corrected_transformed_elements_source_set
402       .Init(&corrected_transformed_elements_source))
403     return C_STREAM_ERROR;
404
405   for (size_t i = 0;  i < number_of_transformations;  ++i) {
406     SourceStreamSet single_corrected_transformed_element;
407     if (!corrected_transformed_elements_source_set.ReadSet(
408             &single_corrected_transformed_element))
409       return C_STREAM_ERROR;
410     Status status = generators[i]->Reform(&single_corrected_transformed_element,
411                                           &predicted_ensemble);
412     if (status != C_OK)
413       return status;
414     if (!single_corrected_transformed_element.Empty())
415       return C_STREAM_NOT_CONSUMED;
416   }
417
418   if (!corrected_transformed_elements_source_set.Empty())
419     return C_STREAM_NOT_CONSUMED;
420
421   // No more references to this stream's buffer.
422   linearized_corrected_transformed_elements.Retire();
423
424   FreeGenerators(&generators);
425
426   size_t final_patch_input_size = predicted_ensemble.Length();
427   SourceStream predicted_ensemble_source;
428   predicted_ensemble_source.Init(predicted_ensemble);
429   Status delta3_status = GenerateSimpleDelta(&predicted_ensemble_source,
430                                              update,
431                                              ensemble_correction);
432   if (delta3_status != C_OK)
433     return delta3_status;
434
435   //
436   // Final output stream has a header followed by a StreamSet.
437   //
438   if (!final_patch->WriteVarint32(CourgettePatchFile::kMagic) ||
439       !final_patch->WriteVarint32(CourgettePatchFile::kVersion) ||
440       !final_patch->WriteVarint32(CalculateCrc(old_region.start(),
441                                                old_region.length())) ||
442       !final_patch->WriteVarint32(CalculateCrc(new_region.start(),
443                                                new_region.length())) ||
444       !final_patch->WriteSizeVarint32(final_patch_input_size) ||
445       !patch_streams.CopyTo(final_patch)) {
446     return C_STREAM_ERROR;
447   }
448
449   VLOG(1) << "done GenerateEnsemblePatch "
450           << (base::Time::Now() - start_time).InSecondsF() << "s";
451
452   return C_OK;
453 }
454
455 }  // namespace