1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "sandbox/linux/seccomp-bpf/codegen.h"
7 #include <linux/filter.h>
11 #include "base/logging.h"
12 #include "sandbox/linux/seccomp-bpf/basicblock.h"
13 #include "sandbox/linux/seccomp-bpf/die.h"
14 #include "sandbox/linux/seccomp-bpf/instruction.h"
18 CodeGen::CodeGen() : compiled_(false) {}
21 for (Instructions::iterator iter = instructions_.begin();
22 iter != instructions_.end();
26 for (BasicBlocks::iterator iter = basic_blocks_.begin();
27 iter != basic_blocks_.end();
33 Instruction* CodeGen::MakeInstruction(uint16_t code,
36 // We can handle non-jumping instructions and "always" jumps. Both of
37 // them are followed by exactly one "next" instruction.
38 // We allow callers to defer specifying "next", but then they must call
39 // "joinInstructions" later.
40 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
42 "Must provide both \"true\" and \"false\" branch "
45 if (next && BPF_CLASS(code) == BPF_RET) {
46 SANDBOX_DIE("Cannot append instructions after a return statement");
48 if (BPF_CLASS(code) == BPF_JMP) {
49 // "Always" jumps use the "true" branch target, only.
50 Instruction* insn = new Instruction(code, 0, next, NULL);
51 instructions_.push_back(insn);
54 // Non-jumping instructions do not use any of the branch targets.
55 Instruction* insn = new Instruction(code, k, next);
56 instructions_.push_back(insn);
61 Instruction* CodeGen::MakeInstruction(uint16_t code,
65 // We can handle all conditional jumps. They are followed by both a
66 // "true" and a "false" branch.
67 if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) {
68 SANDBOX_DIE("Expected a BPF_JMP instruction");
71 SANDBOX_DIE("Branches must jump to a valid instruction");
73 Instruction* insn = new Instruction(code, k, jt, jf);
74 instructions_.push_back(insn);
78 void CodeGen::FindBranchTargets(const Instruction& instructions,
79 BranchTargets* branch_targets) {
80 // Follow all possible paths through the "instructions" graph and compute
81 // a list of branch targets. This will later be needed to compute the
82 // boundaries of basic blocks.
83 // We maintain a set of all instructions that we have previously seen. This
84 // set ultimately converges on all instructions in the program.
85 std::set<const Instruction*> seen_instructions;
87 for (const Instruction* insn = &instructions; insn;) {
88 seen_instructions.insert(insn);
89 if (BPF_CLASS(insn->code) == BPF_JMP) {
90 // Found a jump. Increase count of incoming edges for each of the jump
92 ++(*branch_targets)[insn->jt_ptr];
93 if (BPF_OP(insn->code) != BPF_JA) {
94 ++(*branch_targets)[insn->jf_ptr];
95 stack.push_back(const_cast<Instruction*>(insn));
97 // Start a recursive decent for depth-first traversal.
98 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
99 // We haven't seen the "true" branch yet. Traverse it now. We have
100 // already remembered the "false" branch on the stack and will
101 // traverse it later.
105 // Now try traversing the "false" branch.
109 // This is a non-jump instruction, just continue to the next instruction
110 // (if any). It's OK if "insn" becomes NULL when reaching a return
112 if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {
114 "Internal compiler error; return instruction must be at "
115 "the end of the BPF program");
117 if (seen_instructions.find(insn->next) == seen_instructions.end()) {
120 // We have seen this instruction before. That could happen if it is
121 // a branch target. No need to continue processing.
125 while (!insn && !stack.empty()) {
126 // We are done processing all the way to a leaf node, backtrack up the
127 // stack to any branches that we haven't processed yet. By definition,
128 // this has to be a "false" branch, as we always process the "true"
129 // branches right away.
132 if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {
133 // We haven't seen the "false" branch yet. So, that's where we'll
137 // We have seen both the "true" and the "false" branch, continue
139 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
141 "Internal compiler error; cannot find all "
151 BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) {
152 // Iterate over all the instructions between "head" and "tail" and
153 // insert them into a new basic block.
154 BasicBlock* bb = new BasicBlock;
155 for (;; head = head->next) {
156 bb->instructions.push_back(head);
160 if (BPF_CLASS(head->code) == BPF_JMP) {
161 SANDBOX_DIE("Found a jump inside of a basic block");
164 basic_blocks_.push_back(bb);
168 void CodeGen::AddBasicBlock(Instruction* head,
170 const BranchTargets& branch_targets,
171 TargetsToBlocks* basic_blocks,
172 BasicBlock** firstBlock) {
173 // Add a new basic block to "basic_blocks". Also set "firstBlock", if it
174 // has not been set before.
175 BranchTargets::const_iterator iter = branch_targets.find(head);
176 if ((iter == branch_targets.end()) != !*firstBlock ||
177 !*firstBlock != basic_blocks->empty()) {
179 "Only the very first basic block should have no "
182 BasicBlock* bb = MakeBasicBlock(head, tail);
186 (*basic_blocks)[head] = bb;
190 BasicBlock* CodeGen::CutGraphIntoBasicBlocks(
191 Instruction* instructions,
192 const BranchTargets& branch_targets,
193 TargetsToBlocks* basic_blocks) {
194 // Textbook implementation of a basic block generator. All basic blocks
195 // start with a branch target and end with either a return statement or
196 // a jump (or are followed by an instruction that forms the beginning of a
197 // new block). Both conditional and "always" jumps are supported.
198 BasicBlock* first_block = NULL;
199 std::set<const Instruction*> seen_instructions;
201 Instruction* tail = NULL;
202 Instruction* head = instructions;
203 for (Instruction* insn = head; insn;) {
204 if (seen_instructions.find(insn) != seen_instructions.end()) {
205 // We somehow went in a circle. This should never be possible. Not even
206 // cyclic graphs are supposed to confuse us this much.
207 SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");
209 seen_instructions.insert(insn);
210 if (tail && branch_targets.find(insn) != branch_targets.end()) {
211 // We reached a branch target. Start a new basic block (this means,
212 // flushing the previous basic block first).
213 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
216 if (BPF_CLASS(insn->code) == BPF_JMP) {
217 // We reached a jump instruction, this completes our current basic
218 // block. Flush it and continue by traversing both the true and the
219 // false branch of the jump. We need to maintain a stack to do so.
220 AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);
221 if (BPF_OP(insn->code) != BPF_JA) {
222 stack.push_back(insn->jf_ptr);
226 // If we are jumping to an instruction that we have previously
227 // processed, we are done with this branch. Continue by backtracking
229 while (seen_instructions.find(insn) != seen_instructions.end()) {
232 // We successfully traversed all reachable instructions.
235 // Going up the stack.
240 // Starting a new basic block.
244 // We found a non-jumping instruction, append it to current basic
249 // We reached a return statement, flush the current basic block and
250 // backtrack up the stack.
251 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
259 // We define a comparator that inspects the sequence of instructions in our
260 // basic block and any blocks referenced by this block. This function can be
261 // used in a "less" comparator for the purpose of storing pointers to basic
262 // blocks in STL containers; this gives an easy option to use STL to find
263 // shared tail sequences of basic blocks.
264 static int PointerCompare(const BasicBlock* block1,
265 const BasicBlock* block2,
266 const TargetsToBlocks& blocks) {
267 // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".
268 // If we are looking at the exact same block, this is trivial and we don't
269 // need to do a full comparison.
270 if (block1 == block2) {
274 // We compare the sequence of instructions in both basic blocks.
275 const Instructions& insns1 = block1->instructions;
276 const Instructions& insns2 = block2->instructions;
277 // Basic blocks should never be empty.
278 CHECK(!insns1.empty());
279 CHECK(!insns2.empty());
281 Instructions::const_iterator iter1 = insns1.begin();
282 Instructions::const_iterator iter2 = insns2.begin();
283 for (;; ++iter1, ++iter2) {
284 // If we have reached the end of the sequence of instructions in one or
285 // both basic blocks, we know the relative ordering between the two blocks
287 if (iter1 == insns1.end() || iter2 == insns2.end()) {
288 if (iter1 != insns1.end()) {
291 if (iter2 != insns2.end()) {
295 // If the two blocks are the same length (and have elementwise-equal code
296 // and k fields) and their last instructions are neither a JMP nor a RET
297 // (which is the only way we can reach this point), then we must compare
299 Instruction* const insns1_last = insns1.back();
300 Instruction* const insns2_last = insns2.back();
301 CHECK(BPF_CLASS(insns1_last->code) != BPF_JMP &&
302 BPF_CLASS(insns1_last->code) != BPF_RET);
304 // Non jumping instructions will always have a valid next instruction.
305 CHECK(insns1_last->next);
306 CHECK(insns2_last->next);
307 return PointerCompare(blocks.find(insns1_last->next)->second,
308 blocks.find(insns2_last->next)->second,
312 // Compare the individual fields for both instructions.
313 const Instruction& insn1 = **iter1;
314 const Instruction& insn2 = **iter2;
315 if (insn1.code != insn2.code) {
316 return insn1.code - insn2.code;
318 if (insn1.k != insn2.k) {
319 return insn1.k - insn2.k;
322 // Sanity check: If we're looking at a JMP or RET instruction, by definition
323 // it should be the last instruction of the basic block.
324 if (BPF_CLASS(insn1.code) == BPF_JMP || BPF_CLASS(insn1.code) == BPF_RET) {
325 CHECK_EQ(insns1.back(), &insn1);
326 CHECK_EQ(insns2.back(), &insn2);
329 // RET instructions terminate execution, and only JMP instructions use the
330 // jt_ptr and jf_ptr fields. Anything else can continue to the next
331 // instruction in the basic block.
332 if (BPF_CLASS(insn1.code) == BPF_RET) {
334 } else if (BPF_CLASS(insn1.code) != BPF_JMP) {
338 // Recursively compare the "true" and "false" branches.
339 // A well-formed BPF program can't have any cycles, so we know
340 // that our recursive algorithm will ultimately terminate.
341 // In the unlikely event that the programmer made a mistake and
342 // went out of the way to give us a cyclic program, we will crash
343 // with a stack overflow. We are OK with that.
344 if (BPF_OP(insn1.code) != BPF_JA) {
345 int c = PointerCompare(blocks.find(insn1.jf_ptr)->second,
346 blocks.find(insn2.jf_ptr)->second,
352 return PointerCompare(blocks.find(insn1.jt_ptr)->second,
353 blocks.find(insn2.jt_ptr)->second,
358 void CodeGen::MergeTails(TargetsToBlocks* blocks) {
359 // We enter all of our basic blocks into a set using the BasicBlock::Less()
360 // comparator. This naturally results in blocks with identical tails of
361 // instructions to map to the same entry in the set. Whenever we discover
362 // that a particular chain of instructions is already in the set, we merge
363 // the basic blocks and update the pointer in the "blocks" map.
364 // Returns the number of unique basic blocks.
365 // N.B. We don't merge instructions on a granularity that is finer than
366 // a basic block. In practice, this is sufficiently rare that we don't
368 // Similarly, we currently don't merge anything other than tails. In
369 // the future, we might decide to revisit this decision and attempt to
370 // merge arbitrary sub-sequences of instructions.
371 BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);
372 typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set;
373 Set seen_basic_blocks(less);
374 for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end();
376 BasicBlock* bb = iter->second;
377 Set::const_iterator entry = seen_basic_blocks.find(bb);
378 if (entry == seen_basic_blocks.end()) {
379 // This is the first time we see this particular sequence of
380 // instructions. Enter the basic block into the set of known
382 seen_basic_blocks.insert(bb);
384 // We have previously seen another basic block that defines the same
385 // sequence of instructions. Merge the two blocks and update the
386 // pointer in the "blocks" map.
387 iter->second = *entry;
392 void CodeGen::ComputeIncomingBranches(BasicBlock* block,
393 const TargetsToBlocks& targets_to_blocks,
394 IncomingBranches* incoming_branches) {
395 // We increment the number of incoming branches each time we encounter a
396 // basic block. But we only traverse recursively the very first time we
397 // encounter a new block. This is necessary to make topological sorting
399 if (++(*incoming_branches)[block] == 1) {
400 Instruction* last_insn = block->instructions.back();
401 if (BPF_CLASS(last_insn->code) == BPF_JMP) {
402 ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second,
405 if (BPF_OP(last_insn->code) != BPF_JA) {
406 ComputeIncomingBranches(
407 targets_to_blocks.find(last_insn->jf_ptr)->second,
411 } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
412 ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,
419 void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block,
420 const TargetsToBlocks& blocks,
421 BasicBlocks* basic_blocks) {
422 // Textbook implementation of a toposort. We keep looking for basic blocks
423 // that don't have any incoming branches (initially, this is just the
424 // "first_block") and add them to the topologically sorted list of
425 // "basic_blocks". As we do so, we remove outgoing branches. This potentially
426 // ends up making our descendants eligible for the sorted list. The
427 // sorting algorithm terminates when there are no more basic blocks that have
428 // no incoming branches. If we didn't move all blocks from the set of
429 // "unordered_blocks" to the sorted list of "basic_blocks", there must have
430 // been a cyclic dependency. This should never happen in a BPF program, as
431 // well-formed BPF programs only ever have forward branches.
432 IncomingBranches unordered_blocks;
433 ComputeIncomingBranches(first_block, blocks, &unordered_blocks);
435 std::set<BasicBlock*> heads;
437 // Move block from "unordered_blocks" to "basic_blocks".
438 basic_blocks->push_back(first_block);
440 // Inspect last instruction in the basic block. This is typically either a
441 // jump or a return statement. But it could also be a "normal" instruction
442 // that is followed by a jump target.
443 Instruction* last_insn = first_block->instructions.back();
444 if (BPF_CLASS(last_insn->code) == BPF_JMP) {
445 // Remove outgoing branches. This might end up moving our descendants
446 // into set of "head" nodes that no longer have any incoming branches.
447 TargetsToBlocks::const_iterator iter;
448 if (BPF_OP(last_insn->code) != BPF_JA) {
449 iter = blocks.find(last_insn->jf_ptr);
450 if (!--unordered_blocks[iter->second]) {
451 heads.insert(iter->second);
454 iter = blocks.find(last_insn->jt_ptr);
455 if (!--unordered_blocks[iter->second]) {
456 first_block = iter->second;
459 } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
460 // We encountered an instruction that doesn't change code flow. Try to
461 // pick the next "first_block" from "last_insn->next", if possible.
462 TargetsToBlocks::const_iterator iter;
463 iter = blocks.find(last_insn->next);
464 if (!--unordered_blocks[iter->second]) {
465 first_block = iter->second;
468 // Our basic block is supposed to be followed by "last_insn->next",
469 // but dependencies prevent this from happening. Insert a BPF_JA
470 // instruction to correct the code flow.
471 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next);
472 first_block->instructions.push_back(ja);
473 last_insn->next = ja;
477 if (unordered_blocks.size() != basic_blocks->size()) {
478 SANDBOX_DIE("Internal compiler error; cyclic graph detected");
482 // Proceed by picking an arbitrary node from the set of basic blocks that
483 // do not have any incoming branches.
484 first_block = *heads.begin();
485 heads.erase(heads.begin());
489 void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks,
490 const TargetsToBlocks& targets_to_blocks) {
491 // While we previously used pointers in jt_ptr and jf_ptr to link jump
492 // instructions to their targets, we now convert these jumps to relative
493 // jumps that are suitable for loading the BPF program into the kernel.
496 // Since we just completed a toposort, all jump targets are guaranteed to
497 // go forward. This means, iterating over the basic blocks in reverse makes
498 // it trivial to compute the correct offsets.
499 BasicBlock* bb = NULL;
500 BasicBlock* last_bb = NULL;
501 for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();
502 iter != basic_blocks->rend();
506 Instruction* insn = bb->instructions.back();
507 if (BPF_CLASS(insn->code) == BPF_JMP) {
508 // Basic block ended in a jump instruction. We can now compute the
509 // appropriate offsets.
510 if (BPF_OP(insn->code) == BPF_JA) {
511 // "Always" jumps use the 32bit "k" field for the offset, instead
512 // of the 8bit "jt" and "jf" fields.
513 int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
515 insn->jt = insn->jf = 0;
517 // The offset computations for conditional jumps are just the same
518 // as for "always" jumps.
519 int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
520 int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset;
522 // There is an added complication, because conditional relative jumps
523 // can only jump at most 255 instructions forward. If we have to jump
524 // further, insert an extra "always" jump.
525 Instructions::size_type jmp = bb->instructions.size();
526 if (jt > 255 || (jt == 255 && jf > 255)) {
527 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr);
528 bb->instructions.push_back(ja);
532 // The newly inserted "always" jump, of course, requires us to adjust
533 // the jump targets in the original conditional jump.
538 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr);
539 bb->instructions.insert(bb->instructions.begin() + jmp, ja);
543 // Again, we have to adjust the jump targets in the original
549 // Now we can finally set the relative jump targets in the conditional
550 // jump instruction. Afterwards, we must no longer access the jt_ptr
551 // and jf_ptr fields.
555 } else if (BPF_CLASS(insn->code) != BPF_RET &&
556 targets_to_blocks.find(insn->next)->second != last_bb) {
557 SANDBOX_DIE("Internal compiler error; invalid basic block encountered");
560 // Proceed to next basic block.
561 offset += bb->instructions.size();
567 void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,
569 // Our basic blocks have been sorted and relative jump offsets have been
570 // computed. The last remaining step is for all the instructions in our
571 // basic blocks to be concatenated into a BPF program.
573 for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();
574 bb_iter != basic_blocks.end();
576 const BasicBlock& bb = **bb_iter;
577 for (Instructions::const_iterator insn_iter = bb.instructions.begin();
578 insn_iter != bb.instructions.end();
580 const Instruction& insn = **insn_iter;
582 (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k});
588 void CodeGen::Compile(Instruction* instructions, Program* program) {
591 "Cannot call Compile() multiple times. Create a new code "
592 "generator instead");
596 BranchTargets branch_targets;
597 FindBranchTargets(*instructions, &branch_targets);
598 TargetsToBlocks all_blocks;
599 BasicBlock* first_block =
600 CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);
601 MergeTails(&all_blocks);
602 BasicBlocks basic_blocks;
603 TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);
604 ComputeRelativeJumps(&basic_blocks, all_blocks);
605 ConcatenateBasicBlocks(basic_blocks, program);
609 } // namespace sandbox