1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "sandbox/linux/seccomp-bpf/codegen.h"
7 #include <linux/filter.h>
13 #include "sandbox/linux/seccomp-bpf/basicblock.h"
14 #include "sandbox/linux/seccomp-bpf/errorcode.h"
15 #include "sandbox/linux/seccomp-bpf/instruction.h"
16 #include "testing/gtest/include/gtest/gtest.h"
20 // We want to access some of the private methods in the code generator. We
21 // do so by defining a "friend" that makes these methods public for us.
22 class CodeGenUnittestHelper : public CodeGen {
24 using CodeGen::CutGraphIntoBasicBlocks;
25 using CodeGen::FindBranchTargets;
26 using CodeGen::MergeTails;
33 HAS_MERGEABLE_TAILS = 0x0001,
36 using ProgramTestFunc = void (*)(CodeGenUnittestHelper* gen,
40 class ProgramTest : public ::testing::TestWithParam<ProgramTestFunc> {
42 ProgramTest() : gen_() {}
44 // RunTest runs the test function argument. It should be called at
45 // the end of each program test case.
46 void RunTest(Instruction* head, int flags) { GetParam()(&gen_, head, flags); }
48 Instruction* MakeInstruction(uint16_t code,
50 Instruction* next = nullptr) {
51 Instruction* ret = gen_.MakeInstruction(code, k, next);
52 EXPECT_NE(nullptr, ret);
53 EXPECT_EQ(code, ret->code);
55 if (code == BPF_JMP + BPF_JA) {
56 // Annoying inconsistency.
57 EXPECT_EQ(nullptr, ret->next);
58 EXPECT_EQ(next, ret->jt_ptr);
60 EXPECT_EQ(next, ret->next);
61 EXPECT_EQ(nullptr, ret->jt_ptr);
63 EXPECT_EQ(nullptr, ret->jf_ptr);
67 Instruction* MakeInstruction(uint16_t code,
71 Instruction* ret = gen_.MakeInstruction(code, k, jt, jf);
72 EXPECT_NE(nullptr, ret);
73 EXPECT_EQ(code, ret->code);
75 EXPECT_EQ(nullptr, ret->next);
76 EXPECT_EQ(jt, ret->jt_ptr);
77 EXPECT_EQ(jf, ret->jf_ptr);
82 CodeGenUnittestHelper gen_;
85 TEST_P(ProgramTest, OneInstruction) {
86 // Create the most basic valid BPF program:
88 Instruction* head = MakeInstruction(BPF_RET + BPF_K, 0);
89 RunTest(head, NO_FLAGS);
92 TEST_P(ProgramTest, SimpleBranch) {
93 // Create a program with a single branch:
94 // JUMP if eq 42 then $0 else $1
97 Instruction* head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K,
99 MakeInstruction(BPF_RET + BPF_K, 1),
100 MakeInstruction(BPF_RET + BPF_K, 0));
101 RunTest(head, NO_FLAGS);
104 TEST_P(ProgramTest, AtypicalBranch) {
105 // Create a program with a single branch:
106 // JUMP if eq 42 then $0 else $0
109 Instruction* ret = MakeInstruction(BPF_RET + BPF_K, 0);
110 Instruction* head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret);
112 // N.B.: As the instructions in both sides of the branch are already
113 // the same object, we do not actually have any "mergeable" branches.
114 // This needs to be reflected in our choice of "flags".
115 RunTest(head, NO_FLAGS);
118 TEST_P(ProgramTest, Complex) {
119 // Creates a basic BPF program that we'll use to test some of the code:
120 // JUMP if eq 42 the $0 else $1 (insn6)
122 // 1: JUMP if eq 42 then $2 else $4 (insn4)
123 // 2: JUMP to $3 (insn2)
128 Instruction* insn0 = MakeInstruction(BPF_RET + BPF_K, 42);
129 Instruction* insn1 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0);
130 Instruction* insn2 = MakeInstruction(BPF_JMP + BPF_JA, 0, insn1);
132 // We explicitly duplicate instructions so that MergeTails() can coalesce
134 Instruction* insn3 = MakeInstruction(
135 BPF_LD + BPF_W + BPF_ABS, 42, MakeInstruction(BPF_RET + BPF_K, 42));
138 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3);
139 Instruction* insn5 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4);
141 // Force a basic block that ends in neither a jump instruction nor a return
142 // instruction. It only contains "insn5". This exercises one of the less
143 // common code paths in the topo-sort algorithm.
144 // This also gives us a diamond-shaped pattern in our graph, which stresses
145 // another aspect of the topo-sort algorithm (namely, the ability to
146 // correctly count the incoming branches for subtrees that are not disjunct).
148 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4);
150 RunTest(insn6, HAS_MERGEABLE_TAILS);
153 TEST_P(ProgramTest, ConfusingTails) {
154 // This simple program demonstrates https://crbug.com/351103/
155 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could
156 // be tempted to merge them since they are the same. However, they are
157 // not mergeable because they fall-through to non semantically equivalent
159 // Without the fix for this bug, this program should trigger the check in
160 // CompileAndCompare: the serialized graphs from the program and its compiled
161 // version will differ.
164 // 1) if A == 0x1; then JMP 2 else JMP 3
165 // 2) LOAD 0 // System call number
166 // 3) if A == 0x2; then JMP 4 else JMP 5
167 // 4) LOAD 0 // System call number
168 // 5) if A == 0x1; then JMP 6 else JMP 7
172 Instruction* i7 = MakeInstruction(BPF_RET + BPF_K, 1);
173 Instruction* i6 = MakeInstruction(BPF_RET + BPF_K, 0);
174 Instruction* i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);
175 Instruction* i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);
176 Instruction* i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
177 Instruction* i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);
178 Instruction* i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
179 Instruction* i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
181 RunTest(i0, NO_FLAGS);
184 TEST_P(ProgramTest, ConfusingTailsBasic) {
185 // Without the fix for https://crbug.com/351103/, (see
186 // SampleProgramConfusingTails()), this would generate a cyclic graph and
187 // crash as the two "LOAD 0" instructions would get merged.
190 // 1) if A == 0x1; then JMP 2 else JMP 3
191 // 2) LOAD 0 // System call number
192 // 3) if A == 0x2; then JMP 4 else JMP 5
193 // 4) LOAD 0 // System call number
196 Instruction* i5 = MakeInstruction(BPF_RET + BPF_K, 1);
197 Instruction* i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);
198 Instruction* i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
199 Instruction* i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);
200 Instruction* i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
201 Instruction* i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
203 RunTest(i0, NO_FLAGS);
206 TEST_P(ProgramTest, ConfusingTailsMergeable) {
207 // This is similar to SampleProgramConfusingTails(), except that
208 // instructions 2 and 4 are now RET instructions.
209 // In PointerCompare(), this exercises the path where two blocks are of the
210 // same length and identical and the last instruction is a JMP or RET, so the
211 // following blocks don't need to be looked at and the blocks are mergeable.
214 // 1) if A == 0x1; then JMP 2 else JMP 3
216 // 3) if A == 0x2; then JMP 4 else JMP 5
218 // 5) if A == 0x1; then JMP 6 else JMP 7
222 Instruction* i7 = MakeInstruction(BPF_RET + BPF_K, 1);
223 Instruction* i6 = MakeInstruction(BPF_RET + BPF_K, 0);
224 Instruction* i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);
225 Instruction* i4 = MakeInstruction(BPF_RET + BPF_K, 42);
226 Instruction* i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
227 Instruction* i2 = MakeInstruction(BPF_RET + BPF_K, 42);
228 Instruction* i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
229 Instruction* i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
231 RunTest(i0, HAS_MERGEABLE_TAILS);
234 void MakeInstruction(CodeGenUnittestHelper* codegen,
235 Instruction* program, int) {
236 // Nothing to do here
239 void FindBranchTargets(CodeGenUnittestHelper* codegen, Instruction* prg, int) {
240 BranchTargets branch_targets;
241 codegen->FindBranchTargets(*prg, &branch_targets);
243 // Verifying the general properties that should be true for every
244 // well-formed BPF program.
245 // Perform a depth-first traversal of the BPF program an verify that all
246 // targets of BPF_JMP instructions are represented in the "branch_targets".
247 // At the same time, compute a set of both the branch targets and all the
248 // instructions in the program.
249 std::vector<Instruction*> stack;
250 std::set<Instruction*> all_instructions;
251 std::set<Instruction*> target_instructions;
252 BranchTargets::const_iterator end = branch_targets.end();
253 for (Instruction* insn = prg;;) {
254 all_instructions.insert(insn);
255 if (BPF_CLASS(insn->code) == BPF_JMP) {
256 target_instructions.insert(insn->jt_ptr);
257 ASSERT_TRUE(insn->jt_ptr != NULL);
258 ASSERT_TRUE(branch_targets.find(insn->jt_ptr) != end);
259 if (BPF_OP(insn->code) != BPF_JA) {
260 target_instructions.insert(insn->jf_ptr);
261 ASSERT_TRUE(insn->jf_ptr != NULL);
262 ASSERT_TRUE(branch_targets.find(insn->jf_ptr) != end);
263 stack.push_back(insn->jf_ptr);
266 } else if (BPF_CLASS(insn->code) == BPF_RET) {
267 ASSERT_TRUE(insn->next == NULL);
274 ASSERT_TRUE(insn->next != NULL);
278 ASSERT_TRUE(target_instructions.size() == branch_targets.size());
280 // We can now subtract the set of the branch targets from the set of all
281 // instructions. This gives us a set with the instructions that nobody
282 // ever jumps to. Verify that they are no included in the
283 // "branch_targets" that FindBranchTargets() computed for us.
284 Instructions non_target_instructions(all_instructions.size() -
285 target_instructions.size());
286 set_difference(all_instructions.begin(),
287 all_instructions.end(),
288 target_instructions.begin(),
289 target_instructions.end(),
290 non_target_instructions.begin());
291 for (Instructions::const_iterator iter = non_target_instructions.begin();
292 iter != non_target_instructions.end();
294 ASSERT_TRUE(branch_targets.find(*iter) == end);
298 void CutGraphIntoBasicBlocks(CodeGenUnittestHelper* codegen,
301 BranchTargets branch_targets;
302 codegen->FindBranchTargets(*prg, &branch_targets);
303 TargetsToBlocks all_blocks;
304 BasicBlock* first_block =
305 codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks);
306 ASSERT_TRUE(first_block != NULL);
307 ASSERT_TRUE(first_block->instructions.size() > 0);
308 Instruction* first_insn = first_block->instructions[0];
310 // Basic blocks are supposed to start with a branch target and end with
311 // either a jump or a return instruction. It can also end, if the next
312 // instruction forms the beginning of a new basic block. There should be
313 // no other jumps or return instructions in the middle of a basic block.
314 for (TargetsToBlocks::const_iterator bb_iter = all_blocks.begin();
315 bb_iter != all_blocks.end();
317 BasicBlock* bb = bb_iter->second;
318 ASSERT_TRUE(bb != NULL);
319 ASSERT_TRUE(bb->instructions.size() > 0);
320 Instruction* insn = bb->instructions[0];
321 ASSERT_TRUE(insn == first_insn ||
322 branch_targets.find(insn) != branch_targets.end());
323 for (Instructions::const_iterator insn_iter = bb->instructions.begin();;) {
325 if (++insn_iter != bb->instructions.end()) {
326 ASSERT_TRUE(BPF_CLASS(insn->code) != BPF_JMP);
327 ASSERT_TRUE(BPF_CLASS(insn->code) != BPF_RET);
329 ASSERT_TRUE(BPF_CLASS(insn->code) == BPF_JMP ||
330 BPF_CLASS(insn->code) == BPF_RET ||
331 branch_targets.find(insn->next) != branch_targets.end());
334 ASSERT_TRUE(branch_targets.find(*insn_iter) == branch_targets.end());
339 void MergeTails(CodeGenUnittestHelper* codegen, Instruction* prg, int flags) {
340 BranchTargets branch_targets;
341 codegen->FindBranchTargets(*prg, &branch_targets);
342 TargetsToBlocks all_blocks;
343 BasicBlock* first_block =
344 codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks);
346 // The shape of our graph and thus the function of our program should
347 // still be unchanged after we run MergeTails(). We verify this by
348 // serializing the graph and verifying that it is still the same.
349 // We also verify that at least some of the edges changed because of
351 std::string graph[2];
352 std::string edges[2];
354 // The loop executes twice. After the first run, we call MergeTails() on
357 // Traverse the entire program in depth-first order.
358 std::vector<BasicBlock*> stack;
359 for (BasicBlock* bb = first_block;;) {
360 // Serialize the instructions in this basic block. In general, we only
361 // need to serialize "code" and "k"; except for a BPF_JA instruction
362 // where "k" isn't set.
363 // The stream of instructions should be unchanged after MergeTails().
364 for (Instructions::const_iterator iter = bb->instructions.begin();
365 iter != bb->instructions.end();
367 graph[i].append(reinterpret_cast<char*>(&(*iter)->code),
368 sizeof((*iter)->code));
369 if (BPF_CLASS((*iter)->code) != BPF_JMP ||
370 BPF_OP((*iter)->code) != BPF_JA) {
371 graph[i].append(reinterpret_cast<char*>(&(*iter)->k),
376 // Also serialize the addresses the basic blocks as we encounter them.
377 // This will change as basic blocks are coalesed by MergeTails().
378 edges[i].append(reinterpret_cast<char*>(&bb), sizeof(bb));
380 // Depth-first traversal of the graph. We only ever need to look at the
381 // very last instruction in the basic block, as that is the only one that
382 // can change code flow.
383 Instruction* insn = bb->instructions.back();
384 if (BPF_CLASS(insn->code) == BPF_JMP) {
385 // For jump instructions, we need to remember the "false" branch while
386 // traversing the "true" branch. This is not necessary for BPF_JA which
387 // only has a single branch.
388 if (BPF_OP(insn->code) != BPF_JA) {
389 stack.push_back(all_blocks[insn->jf_ptr]);
391 bb = all_blocks[insn->jt_ptr];
392 } else if (BPF_CLASS(insn->code) == BPF_RET) {
393 // After a BPF_RET, see if we need to back track.
400 // For "normal" instructions, just follow to the next basic block.
401 bb = all_blocks[insn->next];
405 // Our loop runs exactly two times.
409 codegen->MergeTails(&all_blocks);
411 ASSERT_TRUE(graph[0] == graph[1]);
412 if (flags & HAS_MERGEABLE_TAILS) {
413 ASSERT_TRUE(edges[0] != edges[1]);
415 ASSERT_TRUE(edges[0] == edges[1]);
419 void CompileAndCompare(CodeGenUnittestHelper* codegen, Instruction* prg, int) {
420 // TopoSortBasicBlocks() has internal checks that cause it to fail, if it
421 // detects a problem. Typically, if anything goes wrong, this looks to the
422 // TopoSort algorithm as if there had been cycles in the input data.
423 // This provides a pretty good unittest.
424 // We hand-crafted the program returned by SampleProgram() to exercise
425 // several of the more interesting code-paths. See comments in
426 // SampleProgram() for details.
427 // In addition to relying on the internal consistency checks in the compiler,
428 // we also serialize the graph and the resulting BPF program and compare
429 // them. With the exception of BPF_JA instructions that might have been
430 // inserted, both instruction streams should be equivalent.
431 // As Compile() modifies the instructions, we have to serialize the graph
432 // before calling Compile().
434 Instructions source_stack;
435 for (const Instruction* insn = prg, *next; insn; insn = next) {
436 if (BPF_CLASS(insn->code) == BPF_JMP) {
437 if (BPF_OP(insn->code) == BPF_JA) {
438 // Do not serialize BPF_JA instructions (see above).
442 source_stack.push_back(insn->jf_ptr);
445 } else if (BPF_CLASS(insn->code) == BPF_RET) {
446 if (source_stack.empty()) {
449 next = source_stack.back();
450 source_stack.pop_back();
455 // Only serialize "code" and "k". That's all the information we need to
456 // compare. The rest of the information is encoded in the order of
458 source.append(reinterpret_cast<const char*>(&insn->code),
460 source.append(reinterpret_cast<const char*>(&insn->k), sizeof(insn->k));
463 // Compile the program
464 CodeGen::Program bpf;
465 codegen->Compile(prg, &bpf);
467 // Serialize the resulting BPF instructions.
468 std::string assembly;
469 std::vector<int> assembly_stack;
470 for (int idx = 0; idx >= 0;) {
471 ASSERT_TRUE(idx < (int)bpf.size());
472 struct sock_filter& insn = bpf[idx];
473 if (BPF_CLASS(insn.code) == BPF_JMP) {
474 if (BPF_OP(insn.code) == BPF_JA) {
475 // Do not serialize BPF_JA instructions (see above).
479 assembly_stack.push_back(idx + insn.jf + 1);
482 } else if (BPF_CLASS(insn.code) == BPF_RET) {
483 if (assembly_stack.empty()) {
486 idx = assembly_stack.back();
487 assembly_stack.pop_back();
492 // Serialize the same information that we serialized before compilation.
493 assembly.append(reinterpret_cast<char*>(&insn.code), sizeof(insn.code));
494 assembly.append(reinterpret_cast<char*>(&insn.k), sizeof(insn.k));
496 ASSERT_TRUE(source == assembly);
499 const ProgramTestFunc kProgramTestFuncs[] = {
502 CutGraphIntoBasicBlocks,
507 INSTANTIATE_TEST_CASE_P(CodeGen,
509 ::testing::ValuesIn(kProgramTestFuncs));
513 } // namespace sandbox