1 // Copyright 2011 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/adjustment_method.h"
17 #include "base/logging.h"
18 #include "base/memory/raw_ptr.h"
19 #include "base/memory/raw_ref.h"
20 #include "base/strings/string_number_conversions.h"
21 #include "base/strings/stringprintf.h"
22 #include "courgette/assembly_program.h"
23 #include "courgette/courgette.h"
24 #include "courgette/encoded_program.h"
28 ////////////////////////////////////////////////////////////////////////////////
30 class NullAdjustmentMethod : public AdjustmentMethod {
31 bool Adjust(const AssemblyProgram& model, AssemblyProgram* program) {
36 ////////////////////////////////////////////////////////////////////////////////
38 // The purpose of adjustment is to assign indexes to Labels of a program 'p' to
39 // make the sequence of indexes similar to a 'model' program 'm'. Labels
40 // themselves don't have enough information to do this job, so we work with a
41 // LabelInfo surrogate for each label.
45 raw_ptr<Label> label_; // The label that this info a surrogate for.
47 // Information used only in debugging messages.
48 uint32_t is_model_ : 1; // Is the label in the model?
49 uint32_t debug_index_ : 31; // An unique small number for naming the label.
51 uint32_t refs_; // Number of times this Label is referenced.
54 assignment_; // Label from other program corresponding to this.
56 // LabelInfos are in a doubly linked list ordered by address (label_->rva_) so
57 // we can quickly find Labels adjacent in address order.
58 raw_ptr<LabelInfo> next_addr_; // Label(Info) at next highest address.
59 raw_ptr<LabelInfo> prev_addr_; // Label(Info) at next lowest address.
61 std::vector<uint32_t> positions_; // Offsets into the trace of references.
63 // Just a no-argument constructor and copy constructor. Actual LabelInfo
64 // objects are allocated in std::pair structs in a std::map.
72 prev_addr_(nullptr) {}
75 void operator=(const LabelInfo*); // Disallow assignment only.
77 // Public compiler generated copy constructor is needed to constuct
78 // std::pair<Label*, LabelInfo> so that fresh LabelInfos can be allocated
82 struct OrderLabelInfoByAddressAscending {
83 bool operator()(const LabelInfo* a, const LabelInfo* b) const {
84 return a->label_->rva_ < b->label_->rva_;
88 static std::string ToString(LabelInfo* info) {
90 base::StringAppendF(&s, "%c%d", "pm"[info->is_model_], info->debug_index_);
91 if (info->label_->index_ != Label::kNoIndex)
92 base::StringAppendF(&s, " (%d)", info->label_->index_);
94 base::StringAppendF(&s, " #%u", info->refs_);
98 // General graph matching is exponential, essentially trying all permutations.
99 // The exponential algorithm can be made faster by avoiding consideration of
100 // impossible or unlikely matches. We can make the matching practical by eager
101 // matching - by looking for likely matches and commiting to them, and using the
102 // committed assignment as the basis for further matching.
104 // The basic eager graph-matching assignment is based on several ideas:
106 // * The strongest match will be for parts of the program that have not
107 // changed. If part of a program has not changed, then the number of
108 // references to a label will be the same, and corresponding pairs of
109 // adjacent labels will have the same RVA difference.
111 // * Some assignments are 'obvious' if you look at the distribution. Example:
112 // if both the program and the model have a label that is referred to much
113 // more often than the next most refered-to label, it is likely the two
114 // labels correspond.
116 // * If a label from the program corresponds to a label in the model, it is
117 // likely that the labels near the corresponding labels also match. A
118 // conservative way of extending the match is to assign only those labels
119 // which have exactly the same address offset and reference count.
121 // * If two labels correspond, then we can try to match up the references
122 // before and after the labels in the reference stream. For this to be
123 // practical, the number of references has to be small, e.g. each label has
124 // exactly one reference.
127 // Note: we also tried a completely different approach: random assignment
128 // followed by simulated annealing. This produced similar results. The results
129 // were not as good for very small differences because the simulated annealing
130 // never quite hit the groove. And simulated annealing was several orders of
134 // TRIE node for suffix strings in the label reference sequence.
136 // We dynamically build a trie for both the program and model, growing the trie
137 // as necessary. The trie node for a (possibly) empty string of label
138 // references contains the distribution of labels following the string. The
139 // roots node (for the empty string) thus contains the simple distribution of
140 // labels within the label reference stream.
143 Node(LabelInfo* in_edge, Node* prev)
144 : in_edge_(in_edge), prev_(prev), count_(0),
146 length_ = 1 + (prev_ ? prev_->length_ : 0);
148 raw_ptr<LabelInfo> in_edge_; //
149 raw_ptr<Node> prev_; // Node at shorter length.
150 int count_; // Frequency of this path in Trie.
152 typedef std::map<LabelInfo*, Node*> Edges;
154 std::vector<int> places_; // Indexes into sequence of this item.
155 std::list<Node*> edges_in_frequency_order;
158 bool Extended() const { return !edges_.empty(); }
160 uint32_t Weight() const { return edges_in_frequency_order.front()->count_; }
163 static std::string ToString(Node* node) {
164 std::vector<std::string> prefix;
165 for (Node* n = node; n->prev_; n = n->prev_)
166 prefix.push_back(ToString(n->in_edge_));
170 const char* sep = "";
171 while (!prefix.empty()) {
178 s += base::StringPrintf("%u", node->count_);
180 s += base::NumberToString(node->edges_in_frequency_order.size());
185 typedef std::vector<LabelInfo*> Trace;
187 struct OrderNodeByCountDecreasing {
188 bool operator()(Node* a, Node* b) const {
189 if (a->count_ != b->count_)
190 return (a->count_) > (b->count_);
191 return a->places_.at(0) < b->places_.at(0); // Prefer first occuring.
195 struct OrderNodeByWeightDecreasing {
196 bool operator()(Node* a, Node* b) const {
197 // (Maybe tie-break on total count, followed by lowest assigned node indexes
199 uint32_t a_weight = a->Weight();
200 uint32_t b_weight = b->Weight();
201 if (a_weight != b_weight)
202 return a_weight > b_weight;
203 if (a->length_ != b->length_)
204 return a->length_ > b->length_; // Prefer longer.
205 return a->places_.at(0) < b->places_.at(0); // Prefer first occuring.
209 typedef std::set<Node*, OrderNodeByWeightDecreasing> NodeQueue;
211 class AssignmentProblem {
213 AssignmentProblem(const Trace& model, const Trace& problem)
219 AssignmentProblem(const AssignmentProblem&) = delete;
220 AssignmentProblem& operator=(const AssignmentProblem&) = delete;
222 ~AssignmentProblem() {
223 for (size_t i = 0; i < all_nodes_.size(); ++i)
224 delete all_nodes_[i];
228 m_root_ = MakeRootNode(*m_trace_);
229 p_root_ = MakeRootNode(*p_trace_);
232 while (!worklist_.empty()) {
233 Node* node = *worklist_.begin();
234 node->in_queue_ = false;
235 worklist_.erase(node);
239 VLOG(2) << unsolved_.size() << " unsolved items";
244 void AddToQueue(Node* node) {
245 if (node->length_ >= 10) {
246 VLOG(4) << "Length clipped " << ToString(node->prev_);
249 if (node->in_queue_) {
250 LOG(ERROR) << "Double add " << ToString(node);
253 // just to be sure data for prioritizing is available
254 ExtendNode(node, *p_trace_);
255 // SkipCommittedLabels(node);
256 if (node->edges_in_frequency_order.empty())
258 node->in_queue_ = true;
259 worklist_.insert(node);
262 void SkipCommittedLabels(Node* node) {
263 ExtendNode(node, *p_trace_);
264 uint32_t skipped = 0;
265 while (!node->edges_in_frequency_order.empty() &&
266 node->edges_in_frequency_order.front()->in_edge_->assignment_) {
268 node->edges_in_frequency_order.pop_front();
271 VLOG(4) << "Skipped " << skipped << " at " << ToString(node);
274 void TrySolveNode(Node* p_node) {
275 Node* front = p_node->edges_in_frequency_order.front();
276 if (front->in_edge_->assignment_) {
277 p_node->edges_in_frequency_order.pop_front();
283 // Compare frequencies of unassigned edges, and either make
284 // assignment(s) or move node to unsolved list
286 Node* m_node = FindModelNode(p_node);
288 if (m_node == nullptr) {
289 VLOG(2) << "Can't find model node";
290 unsolved_.insert(p_node);
293 ExtendNode(m_node, *m_trace_);
295 // Lets just try greedy
297 SkipCommittedLabels(m_node);
298 if (m_node->edges_in_frequency_order.empty()) {
299 VLOG(4) << "Punting, no elements left in model vs "
300 << p_node->edges_in_frequency_order.size();
301 unsolved_.insert(p_node);
304 Node* m_match = m_node->edges_in_frequency_order.front();
305 Node* p_match = p_node->edges_in_frequency_order.front();
307 if (p_match->count_ > 1.1 * m_match->count_ ||
308 m_match->count_ > 1.1 * p_match->count_) {
309 VLOG(3) << "Tricky distribution "
310 << p_match->count_ << ":" << m_match->count_ << " "
311 << ToString(p_match) << " vs " << ToString(m_match);
315 m_node->edges_in_frequency_order.pop_front();
316 p_node->edges_in_frequency_order.pop_front();
318 LabelInfo* p_label_info = p_match->in_edge_;
319 LabelInfo* m_label_info = m_match->in_edge_;
320 int m_index = p_label_info->label_->index_;
321 if (m_index != Label::kNoIndex) {
322 VLOG(2) << "Cant use unassigned label from model " << m_index;
323 unsolved_.insert(p_node);
327 Assign(p_label_info, m_label_info);
329 AddToQueue(p_match); // find matches within new match
330 AddToQueue(p_node); // and more matches within this node
333 void Assign(LabelInfo* p_info, LabelInfo* m_info) {
334 AssignOne(p_info, m_info);
335 VLOG(4) << "Assign " << ToString(p_info) << " := " << ToString(m_info);
336 // Now consider unassigned adjacent addresses
337 TryExtendAssignment(p_info, m_info);
340 void AssignOne(LabelInfo* p_info, LabelInfo* m_info) {
341 p_info->label_->index_ = m_info->label_->index_;
344 m_info->assignment_ = p_info;
345 p_info->assignment_ = m_info;
348 void TryExtendAssignment(LabelInfo* p_info, LabelInfo* m_info) {
349 RVA m_rva_base = m_info->label_->rva_;
350 RVA p_rva_base = p_info->label_->rva_;
352 LabelInfo* m_info_next = m_info->next_addr_;
353 LabelInfo* p_info_next = p_info->next_addr_;
354 for ( ; m_info_next && p_info_next; ) {
355 if (m_info_next->assignment_)
358 RVA m_rva = m_info_next->label_->rva_;
359 RVA p_rva = p_info_next->label_->rva_;
361 if (m_rva - m_rva_base != p_rva - p_rva_base) {
362 // previous label was pointing to something that is different size
365 LabelInfo* m_info_next_next = m_info_next->next_addr_;
366 LabelInfo* p_info_next_next = p_info_next->next_addr_;
367 if (m_info_next_next && p_info_next_next) {
368 RVA m_rva_next = m_info_next_next->label_->rva_;
369 RVA p_rva_next = p_info_next_next->label_->rva_;
370 if (m_rva_next - m_rva != p_rva_next - p_rva) {
371 // Since following labels are no longer in address lockstep, assume
372 // this address has a difference.
377 // The label has inconsistent numbers of references, it is probably not
379 if (m_info_next->refs_ != p_info_next->refs_) {
383 VLOG(4) << " Extending assignment -> "
384 << ToString(p_info_next) << " := " << ToString(m_info_next);
386 AssignOne(p_info_next, m_info_next);
388 if (p_info_next->refs_ == m_info_next->refs_ &&
389 p_info_next->refs_ == 1) {
390 TryExtendSequence(p_info_next->positions_[0],
391 m_info_next->positions_[0]);
392 TryExtendSequenceBackwards(p_info_next->positions_[0],
393 m_info_next->positions_[0]);
396 p_info_next = p_info_next_next;
397 m_info_next = m_info_next_next;
400 LabelInfo* m_info_prev = m_info->prev_addr_;
401 LabelInfo* p_info_prev = p_info->prev_addr_;
402 for ( ; m_info_prev && p_info_prev; ) {
403 if (m_info_prev->assignment_)
406 RVA m_rva = m_info_prev->label_->rva_;
407 RVA p_rva = p_info_prev->label_->rva_;
409 if (m_rva - m_rva_base != p_rva - p_rva_base) {
410 // previous label was pointing to something that is different size
413 LabelInfo* m_info_prev_prev = m_info_prev->prev_addr_;
414 LabelInfo* p_info_prev_prev = p_info_prev->prev_addr_;
416 // The the label has inconsistent numbers of references, it is
417 // probably not the same thing
418 if (m_info_prev->refs_ != p_info_prev->refs_) {
422 AssignOne(p_info_prev, m_info_prev);
423 VLOG(4) << " Extending assignment <- " << ToString(p_info_prev) << " := "
424 << ToString(m_info_prev);
426 p_info_prev = p_info_prev_prev;
427 m_info_prev = m_info_prev_prev;
431 uint32_t TryExtendSequence(uint32_t p_pos_start, uint32_t m_pos_start) {
432 uint32_t p_pos = p_pos_start + 1;
433 uint32_t m_pos = m_pos_start + 1;
435 while (p_pos < p_trace_->size() && m_pos < m_trace_->size()) {
436 LabelInfo* p_info = (*p_trace_)[p_pos];
437 LabelInfo* m_info = (*m_trace_)[m_pos];
439 // To match, either (1) both are assigned or (2) both are unassigned.
440 if ((p_info->assignment_ == nullptr) != (m_info->assignment_ == nullptr))
443 // If they are assigned, it needs to be consistent (same index).
444 if (p_info->assignment_ && m_info->assignment_) {
445 if (p_info->label_->index_ != m_info->label_->index_)
452 if (p_info->refs_ != m_info->refs_)
455 AssignOne(p_info, m_info);
456 VLOG(4) << " Extending assignment seq[+" << p_pos - p_pos_start
457 << "] -> " << ToString(p_info) << " := " << ToString(m_info);
463 return p_pos - p_pos_start;
466 uint32_t TryExtendSequenceBackwards(uint32_t p_pos_start,
467 uint32_t m_pos_start) {
468 if (p_pos_start == 0 || m_pos_start == 0)
471 uint32_t p_pos = p_pos_start - 1;
472 uint32_t m_pos = m_pos_start - 1;
474 while (p_pos > 0 && m_pos > 0) {
475 LabelInfo* p_info = (*p_trace_)[p_pos];
476 LabelInfo* m_info = (*m_trace_)[m_pos];
478 if ((p_info->assignment_ == nullptr) != (m_info->assignment_ == nullptr))
481 if (p_info->assignment_ && m_info->assignment_) {
482 if (p_info->label_->index_ != m_info->label_->index_)
489 if (p_info->refs_ != m_info->refs_)
492 AssignOne(p_info, m_info);
493 VLOG(4) << " Extending assignment seq[-" << p_pos_start - p_pos
494 << "] <- " << ToString(p_info) << " := " << ToString(m_info);
500 return p_pos - p_pos_start;
503 Node* FindModelNode(Node* node) {
504 if (node->prev_ == nullptr)
507 Node* m_parent = FindModelNode(node->prev_);
508 if (m_parent == nullptr) {
512 ExtendNode(m_parent, *m_trace_);
514 LabelInfo* p_label = node->in_edge_;
515 LabelInfo* m_label = p_label->assignment_;
516 if (m_label == nullptr) {
517 VLOG(2) << "Expected assigned prefix";
521 Node::Edges::iterator e = m_parent->edges_.find(m_label);
522 if (e == m_parent->edges_.end()) {
523 VLOG(3) << "Expected defined edge in parent";
530 Node* MakeRootNode(const Trace& trace) {
531 Node* node = new Node(nullptr, nullptr);
532 all_nodes_.push_back(node);
533 for (uint32_t i = 0; i < trace.size(); ++i) {
535 node->places_.push_back(i);
540 void ExtendNode(Node* node, const Trace& trace) {
541 // Make sure trie is filled in at this node.
542 if (node->Extended())
544 for (size_t i = 0; i < node->places_.size(); ++i) {
545 uint32_t index = node->places_.at(i);
546 if (index < trace.size()) {
547 LabelInfo* item = trace.at(index);
548 Node*& slot = node->edges_[item];
549 if (slot == nullptr) {
550 slot = new Node(item, node);
551 all_nodes_.push_back(slot);
552 node->edges_in_frequency_order.push_back(slot);
554 slot->places_.push_back(index + 1);
558 node->edges_in_frequency_order.sort(OrderNodeByCountDecreasing());
561 const raw_ref<const Trace> m_trace_;
562 const raw_ref<const Trace> p_trace_;
563 raw_ptr<Node> m_root_;
564 raw_ptr<Node> p_root_;
569 std::vector<Node*> all_nodes_;
572 class GraphAdjuster : public AdjustmentMethod {
575 : prog_(nullptr), model_(nullptr), debug_label_index_gen_(0) {}
577 GraphAdjuster(const GraphAdjuster&) = delete;
578 GraphAdjuster& operator=(const GraphAdjuster&) = delete;
580 ~GraphAdjuster() = default;
582 bool Adjust(const AssemblyProgram& model, AssemblyProgram* program) {
583 VLOG(1) << "GraphAdjuster::Adjust";
586 debug_label_index_gen_ = 0;
591 prog_->UnassignIndexes();
592 CollectTraces(model_, &model_abs32_, &model_rel32_, true);
593 CollectTraces(prog_, &prog_abs32_, &prog_rel32_, false);
594 Solve(model_abs32_, prog_abs32_);
595 Solve(model_rel32_, prog_rel32_);
596 prog_->AssignRemainingIndexes();
601 void CollectTraces(const AssemblyProgram* program, Trace* abs32, Trace* rel32,
603 for (Label* label : program->abs32_label_annotations())
604 ReferenceLabel(abs32, is_model, label);
605 for (Label* label : program->rel32_label_annotations())
606 ReferenceLabel(rel32, is_model, label);
608 // TODO(sra): we could simply append all the labels in index order to
609 // incorporate some costing for entropy (bigger deltas) that will be
610 // introduced into the label address table by non-monotonic ordering. This
611 // would have some knock-on effects to parts of the algorithm that work on
612 // single-occurrence labels.
615 void Solve(const Trace& model, const Trace& problem) {
616 LinkLabelInfos(model);
617 LinkLabelInfos(problem);
618 AssignmentProblem a(model, problem);
622 void LinkLabelInfos(const Trace& trace) {
623 typedef std::set<LabelInfo*, OrderLabelInfoByAddressAscending> Ordered;
625 for (Trace::const_iterator p = trace.begin(); p != trace.end(); ++p)
627 LabelInfo* prev = nullptr;
628 for (Ordered::iterator p = ordered.begin(); p != ordered.end(); ++p) {
629 LabelInfo* curr = *p;
630 if (prev) prev->next_addr_ = curr;
631 curr->prev_addr_ = prev;
634 if (curr->positions_.size() != curr->refs_)
639 void ReferenceLabel(Trace* trace, bool is_model, Label* label) {
641 MakeLabelInfo(label, is_model, static_cast<uint32_t>(trace->size())));
644 LabelInfo* MakeLabelInfo(Label* label, bool is_model, uint32_t position) {
645 LabelInfo& slot = label_infos_[label];
646 if (slot.label_ == nullptr) {
648 slot.is_model_ = is_model;
649 slot.debug_index_ = ++debug_label_index_gen_;
651 slot.positions_.push_back(position);
656 raw_ptr<AssemblyProgram> prog_; // Program to be adjusted, owned by caller.
657 raw_ptr<const AssemblyProgram>
658 model_; // Program to be mimicked, owned by caller.
665 int debug_label_index_gen_;
667 // Note LabelInfo is allocated inside map, so the LabelInfo lifetimes are
668 // managed by the map.
669 std::map<Label*, LabelInfo> label_infos_;
673 ////////////////////////////////////////////////////////////////////////////////
675 void AdjustmentMethod::Destroy() { delete this; }
677 AdjustmentMethod* AdjustmentMethod::MakeNullAdjustmentMethod() {
678 return new NullAdjustmentMethod();
681 AdjustmentMethod* AdjustmentMethod::MakeTrieAdjustmentMethod() {
682 return new GraphAdjuster();
685 Status Adjust(const AssemblyProgram& model, AssemblyProgram* program) {
686 AdjustmentMethod* method = AdjustmentMethod::MakeProductionAdjustmentMethod();
687 bool ok = method->Adjust(model, program);
692 return C_ADJUSTMENT_FAILED;
695 } // namespace courgette