2 # Copyright (c) 2013 The Native Client Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 # Executable specification of valid instructions and superinstructions (in terms
7 # of their disassembler listing).
8 # Should serve as formal and up-to-date ABI reference and as baseline for
9 # validator exhaustive tests.
11 # It is generally organized as a set of functions responsible for recognizing
12 # and validating specific patterns (jump instructions, regular instructions,
13 # superinstructions, etc.)
14 # There are three outcomes for running such function:
15 # - function raises DoNotMatchError (which means instruction is of completely
16 # different structure, for example when we call ValidateSuperinstruction on
18 # - function raises SandboxingError (which means instruction generally matches
19 # respective pattern, but some rules are violated)
20 # - function returns (which means instruction(s) is(are) safe)
22 # Why exceptions instead of returning False or something? Because they carry
23 # stack traces, which makes it easier to investigate why particular instruction
25 # Why distinguish DoNotMatchError and SandboxingError? Because on the topmost
26 # level we attempt to call all matchers and we need to see which error message
32 class DoNotMatchError(Exception):
36 class SandboxingError(Exception):
43 def _ValidateLongNop(instruction):
44 # Short nops do not require special exceptions (such as allowing repeated
45 # prefixes and segment access), so they are handled as regular instructions.
46 if re.match(r'nopw 0x0\(%[er]ax,%[er]ax,1\)$',
50 r'(data32 )*nopw %cs:0x0\(%[er]ax,%[er]ax,1\)$',
53 raise DoNotMatchError(instruction)
56 def _ValidateStringInstruction(instruction):
57 prefix_re = r'(rep |repz |repnz )?'
58 lods_re = r'lods %ds:\(%esi\),(%al|%ax|%eax)'
59 stos_re = r'stos (%al|%ax|%eax),%es:\(%edi\)'
60 scas_re = r'scas %es:\(%edi\),(%al|%ax|%eax)'
61 movs_re = r'movs[bwl] %ds:\(%esi\),%es:\(%edi\)'
62 cmps_re = r'cmps[bwl] %es:\(%edi\),%ds:\(%esi\)'
64 string_insn_re = '%s(%s)$' % (
66 '|'.join([lods_re, stos_re, scas_re, movs_re, cmps_re]))
68 if re.match(string_insn_re, instruction.disasm):
71 raise DoNotMatchError(instruction)
74 def _ValidateTlsInstruction(instruction):
75 if re.match(r'mov %gs:(0x0|0x4),%e[a-z][a-z]$', instruction.disasm):
78 raise DoNotMatchError(instruction)
81 # What can follow 'j' in conditional jumps 'je', 'jno', etc.
82 _CONDITION_SUFFIX_RE = r'(a(e?)|b(e?)|g(e?)|l(e?)|(n?)e|(n?)o|(n?)p|(n?)s)'
85 def _AnyRegisterRE(group_name='register'):
86 # TODO(shcherbina): explicitly list all kinds of registers we care to
87 # distinguish for validation purposes.
88 return r'(?P<%s>%%(st\(\d+\)|\w+))' % group_name
91 def _HexRE(group_name='value'):
92 return r'(?P<%s>-?0x[\da-f]+)' % group_name
95 def _ImmediateRE(group_name='immediate'):
96 return r'(?P<%s>\$%s)' % (
98 _HexRE(group_name=group_name + '_value'))
101 def MemoryRE(group_name='memory'):
109 return r'(?P<%s>(?P<%s_segment>%%[cdefgs]s:)?%s?(\(%s?(,%s,\d)?\))?)' % (
112 _HexRE(group_name=group_name + '_offset'),
113 _AnyRegisterRE(group_name=group_name + '_base'),
114 _AnyRegisterRE(group_name=group_name + '_index'))
117 def _IndirectJumpTargetRE(group_name='target'):
118 return r'(?P<%s>\*(%s|%s))' % (
120 _AnyRegisterRE(group_name=group_name + '_register'),
121 MemoryRE(group_name=group_name + '_memory'))
124 def _OperandRE(group_name='operand'):
125 return r'(?P<%s>%s|%s|%s|%s)' % (
127 _AnyRegisterRE(group_name=group_name + '_register'),
128 _ImmediateRE(group_name=group_name + '_immediate'),
129 MemoryRE(group_name=group_name + '_memory'),
130 _IndirectJumpTargetRE(group_name=group_name + '_target'))
133 def _SplitOps(insn, args):
134 # We can't use just args.split(',') because operands can contain commas
135 # themselves, for example '(%r15,%rax,1)'.
139 # We do not use mere re.match(_OperandRE(), args, i) here because
140 # python backtracking regexes do not guarantee to find longest match.
141 m = re.compile(r'(%s)($|,)' % _OperandRE()).match(args, i)
142 assert m is not None, (args, i)
143 ops.append(m.group(1))
147 assert args[i] == ',', (insn, args, i)
152 def ParseInstruction(instruction):
153 """Parse an instruction into operands.
156 instruction: objdump_parser.Instruction tuple
158 prefixes, mnemonic, operands
160 SandboxingError on erroneous bytes.
163 disasm, _, _ = instruction.disasm.partition('#')
164 elems = disasm.split()
167 raise SandboxingError(
168 'disasm is empty', instruction)
171 while elems != [] and elems[0] in [
172 'lock', 'rep', 'repz', 'repnz',
173 'data16', 'data32', 'addr16', 'addr32', 'addr64']:
174 prefixes.append(elems.pop(0))
177 raise SandboxingError(
178 'dangling legacy prefixes', instruction)
182 if re.match(r'rex([.]W?R?X?B?)?$', name):
183 raise SandboxingError('dangling rex prefix', instruction)
185 # There could be branching expectation information in instruction names:
188 name_re = r'[a-z]\w*(,p[nt])?$'
189 assert re.match(name_re, name) or name == "nop/reserved", name
193 elif len(elems) == 2:
194 ops = _SplitOps(instruction, elems[1])
196 assert False, instruction
198 return prefixes, name, ops
219 REGS32 = REG32_TO_REG64.keys()
220 REGS64 = REG32_TO_REG64.values()
223 class Condition(object):
224 """Represents assertion about the state of 64-bit registers.
226 (used as precondition and postcondition)
228 Supported assertions:
229 0. %rpb and %rsp are sandboxed (and nothing is known about other registers)
230 1. {%rax} is restricted, %rbp and %rsp are sandboxed
231 2-13. same for %rbx-%r14 not including %rbp and %rsp
232 14. %rbp is restricted, %rsp is sandboxed
233 15. %rsp is restricted, %rpb is sandboxed
235 It can be observed that all assertions 1..15 differ from default 0 in a single
236 register, which prompts internal representation of a single field,
237 _restricted_register, which stores name of this standing out register
240 * 'restricted' means higher 32 bits are zeroes
241 * 'sandboxed' means within [%r15, %r15 + 2**32) range
242 It goes without saying that %r15 is never changed and by definition sandboxed.
245 def __init__(self, restricted=None, restricted_instead_of_sandboxed=None):
246 self._restricted_register = None
247 if restricted is not None:
248 assert restricted_instead_of_sandboxed is None
249 assert restricted in REGS64
250 assert restricted not in ['%r15', '%rbp', '%rsp']
251 self._restricted_register = restricted
252 if restricted_instead_of_sandboxed is not None:
253 assert restricted is None
254 assert restricted_instead_of_sandboxed in ['%rbp', '%rsp']
255 self._restricted_register = restricted_instead_of_sandboxed
257 def GetAlteredRegisters(self):
258 """ Return pair (restricted, restricted_instead_of_sandboxed).
260 Each item is either register name or None.
262 if self._restricted_register is None:
264 elif self._restricted_register in ['%rsp', '%rbp']:
265 return None, self._restricted_register
267 return self._restricted_register, None
269 def __eq__(self, other):
270 return self._restricted_register == other._restricted_register
272 def __ne__(self, other):
273 return not self == other
275 def Implies(self, other):
276 return self.WhyNotImplies(other) is None
278 def WhyNotImplies(self, other):
279 if other._restricted_register is None:
280 if self._restricted_register in ['%rbp', '%rsp']:
281 return '%s should not be restricted' % self._restricted_register
285 if self._restricted_register != other._restricted_register:
287 'register %s should be restricted, '
288 'while in fact %r is restricted' % (
289 other._restricted_register, self._restricted_register))
294 if self._restricted_register is None:
295 return 'Condition(default)'
296 elif self._restricted_register in ['%rbp', '%rsp']:
297 return ('Condition(%s restricted instead of sandboxed)'
298 % self._restricted_register)
300 return 'Condition(%s restricted)' % self._restricted_register
306 if reg not in ['%r15', '%rbp', '%rsp']:
307 yield Condition(restricted=reg)
308 yield Condition(restricted_instead_of_sandboxed='%rbp')
309 yield Condition(restricted_instead_of_sandboxed='%rsp')
312 def _ValidateSpecialStackInstruction(instruction):
313 # Validate 64-bit instruction that is in special relationship with rsp/rbp.
315 if instruction.disasm in ['mov %rsp,%rbp', 'mov %rbp,%rsp']:
316 return Condition(), Condition()
319 'and %s,%%rsp$' % _ImmediateRE(),
322 # We only allow 1-byte immediate, so we have to look at machine code.
323 if (len(instruction.bytes) == 4 and
324 0x48 <= instruction.bytes[0] <= 0x4f and
325 instruction.bytes[1:3] == [0x83, 0xe4]):
326 # We extract mask from bytes, not from textual representation, because
327 # objdump and RDFA decoder print it differently
328 # (-1 is displayed as '0xffffffffffffffff' by objdump and as '0xff' by
330 # See https://code.google.com/p/nativeclient/issues/detail?id=3164
331 mask = instruction.bytes[3]
332 assert mask == int(m.group('immediate_value'), 16) & 0xff
334 raise SandboxingError(
335 'mask should be negative to ensure that higher '
336 'bits of %rsp do not change',
339 raise SandboxingError(
340 'unsupported form of "and <mask>,%rsp" instruction', instruction)
341 return Condition(), Condition()
343 if (instruction.disasm in ['add %r15,%rbp', 'add %r15,%rbp'] or
344 re.match(r'lea (0x0+)?\(%rbp,%r15,1\),%rbp$', instruction.disasm)):
345 return Condition(restricted_instead_of_sandboxed='%rbp'), Condition()
347 if (instruction.disasm in ['add %r15,%rsp', 'add %r15,%rsp'] or
348 re.match(r'lea (0x0+)?\(%rsp,%r15,1\),%rsp$', instruction.disasm)):
349 return Condition(restricted_instead_of_sandboxed='%rsp'), Condition()
351 # TODO(shcherbina): disallow this instruction once
352 # http://code.google.com/p/nativeclient/issues/detail?id=3070
354 if instruction.disasm == 'or %r15,%rsp':
355 return Condition(restricted_instead_of_sandboxed='%rsp'), Condition()
357 raise DoNotMatchError(instruction)
360 def _GetLegacyPrefixes(instruction):
362 for b in instruction.bytes:
364 0x66, 0x67, 0x2e, 0x3e, 0x26, 0x64, 0x65, 0x36, 0xf0, 0xf3, 0xf2]:
367 raise SandboxingError('addr prefix is not allowed', instruction)
369 raise SandboxingError('duplicate legacy prefix', instruction)
374 def _ProcessMemoryAccess(instruction, operands):
375 """Make sure that memory access is valid and return precondition required.
377 (only makes sense for 64-bit instructions)
380 instruction: Instruction tuple
381 operands: list of instruction operands as strings, for example
382 ['%eax', '(%r15,%rbx,1)']
384 Condition object representing precondition required for memory access (if
385 it's present among operands) to be valid.
387 SandboxingError if memory access is invalid.
389 precondition = Condition()
391 m = re.match(MemoryRE() + r'$', op)
393 assert m.group('memory_segment') is None
394 base = m.group('memory_base')
395 index = m.group('memory_index')
396 allowed_bases = ['%r15', '%rbp', '%rsp', '%rip']
397 if base not in allowed_bases:
398 raise SandboxingError(
399 'memory access only is allowed with base from %s'
402 if index is not None:
405 elif index in REGS64:
406 if index in ['%r15', '%rsp', '%rbp']:
407 raise SandboxingError(
408 '%s can\'t be used as index in memory access' % index,
411 assert precondition == Condition()
412 precondition = Condition(restricted=index)
414 raise SandboxingError(
415 'unrecognized register is used for memory access as index',
420 def _ProcessOperandWrites(instruction, write_operands, zero_extending=False):
421 """Check that writes to operands are valid, return postcondition established.
423 (only makes sense for 64-bit instructions)
426 instruction: Instruction tuple
427 write_operands: list of operands instruction writes to as strings,
428 for example ['%eax', '(%r15,%rbx,1)']
429 zero_extending: whether instruction is considered zero extending
431 Condition object representing postcondition established by operand writes.
433 SandboxingError if write is invalid.
435 postcondition = Condition()
436 for i, op in enumerate(write_operands):
437 if op in ['%r15', '%r15d', '%r15w', '%r15b']:
438 raise SandboxingError('changes to r15 are not allowed', instruction)
439 if op in ['%bpl', '%bp', '%rbp']:
440 raise SandboxingError('changes to rbp are not allowed', instruction)
441 if op in ['%spl', '%sp', '%rsp']:
442 raise SandboxingError('changes to rsp are not allowed', instruction)
445 # Only last of the operand writes is considered zero-extending.
448 # does not zero-extend %rax.
449 if zero_extending and i == len(write_operands) - 1:
450 r = REG32_TO_REG64[op]
451 if r in ['%rbp', '%rsp']:
452 postcondition = Condition(restricted_instead_of_sandboxed=r)
454 postcondition = Condition(restricted=r)
456 if op in ['%ebp', '%esp']:
457 raise SandboxingError(
458 'non-zero-extending changes to ebp and esp are not allowed',
464 def _InstructionNameIn(name, candidates):
465 return re.match('(%s)[bwlq]?$' % '|'.join(candidates), name) is not None
468 _X87_INSTRUCTIONS = set([
471 'fadd', 'fadds', 'faddl', 'faddp',
477 'fcmovb', 'fcmovbe', 'fcmove', 'fcmovnb',
478 'fcmovnbe', 'fcmovne', 'fcmovnu', 'fcmovu',
479 'fcom', 'fcoms', 'fcoml',
480 'fcomp', 'fcomps', 'fcompl',
486 'fdiv', 'fdivs', 'fdivl',
488 'fdivr', 'fdivrs', 'fdivrl',
496 'fild', 'fildl', 'fildll',
500 'fistp', 'fistpl', 'fistpll',
501 'fisttp', 'fisttpl', 'fisttpll',
502 'fld', 'flds', 'fldl', 'fldt',
512 'fmul', 'fmuls', 'fmull',
527 'fst', 'fsts', 'fstl',
528 'fstp', 'fstps', 'fstpl', 'fstpt',
532 'fsub', 'fsubs', 'fsubl',
534 'fsubr', 'fsubrs', 'fsubrl',
539 'fucom', 'fucomp', 'fucompp',
550 # Instructions from mmx_instructions.def (besides MMX, they include SSE2/3
551 # and other stuff that works with MMX registers).
552 _MMX_INSTRUCTIONS = set([
668 # Instructions from xmm_instructions.def (that is, instructions that work
669 # with XMM registers). These instruction names can be prepended with 'v', which
670 # results in their AVX counterpart.
671 _XMM_AVX_INSTRUCTIONS = set([
702 'cvtsi2sd', 'cvtsi2sdl', 'cvtsi2sdq',
703 'cvtsi2ss', 'cvtsi2ssl', 'cvtsi2ssq',
918 _XMM_AVX_INSTRUCTIONS.update(['v' + name for name in _XMM_AVX_INSTRUCTIONS])
920 _XMM_AVX_INSTRUCTIONS.update([
996 # Add instructions like VFMADDPD/VFMADD132PD/VFMADD213PD/VFMADD231PD.
1019 for operand_order_suffix in ['', '132', '213', '231']:
1020 _XMM_AVX_INSTRUCTIONS.add(fma_name % operand_order_suffix)
1022 for cmp_suffix in ['pd', 'ps', 'sd', 'ss']:
1023 for cmp_op in ['', 'eq', 'lt', 'le', 'unord', 'neq', 'nlt', 'nle', 'ord']:
1024 _XMM_AVX_INSTRUCTIONS.add('cmp%s%s' % (cmp_op, cmp_suffix))
1025 _XMM_AVX_INSTRUCTIONS.add('vcmp%s%s' % (cmp_op, cmp_suffix))
1027 'eq_uq', 'nge', 'ngt', 'false',
1028 'neq_oq', 'ge', 'gt', 'true',
1029 'eq_os', 'lt_oq', 'le_oq', 'unord_s',
1030 'neq_us', 'nlt_uq', 'nle_uq', 'ord_s',
1031 'eq_us', 'nge_uq', 'ngt_uq', 'false_os',
1032 'neq_os', 'ge_oq', 'gt_oq', 'true_us']:
1033 _XMM_AVX_INSTRUCTIONS.add('vcmp%s%s' % (cmp_op, cmp_suffix))
1036 def ValidateRegularInstruction(instruction, bitness):
1037 """Validate regular instruction (not direct jump).
1040 instruction: objdump_parser.Instruction tuple
1043 Pair (precondition, postcondition) of Condition instances.
1044 (for 32-bit case they are meaningless and are not used)
1046 According to usual convention.
1048 assert bitness in [32, 64]
1050 if instruction.disasm.startswith('.byte ') or '(bad)' in instruction.disasm:
1051 raise SandboxingError('objdump failed to decode', instruction)
1054 _ValidateLongNop(instruction)
1055 return Condition(), Condition()
1056 except DoNotMatchError:
1059 # Report error on duplicate prefixes (note that they are allowed in
1061 _GetLegacyPrefixes(instruction)
1065 _ValidateStringInstruction(instruction)
1066 return Condition(), Condition()
1067 except DoNotMatchError:
1071 _ValidateTlsInstruction(instruction)
1072 return Condition(), Condition()
1073 except DoNotMatchError:
1078 return _ValidateSpecialStackInstruction(instruction)
1079 except DoNotMatchError:
1082 prefixes, name, ops = ParseInstruction(instruction)
1084 for prefix in prefixes:
1085 if prefix != 'lock':
1086 raise SandboxingError('prefix %s is not allowed' % prefix, instruction)
1089 if op in ['%cs', '%ds', '%es', '%ss', '%fs', '%gs']:
1090 raise SandboxingError(
1091 'access to segment registers is not allowed', instruction)
1092 if op.startswith('%cr'):
1093 raise SandboxingError(
1094 'access to control registers is not allowed', instruction)
1095 if op.startswith('%db'):
1096 raise SandboxingError(
1097 'access to debug registers is not allowed', instruction)
1098 if op.startswith('%tr'):
1099 raise SandboxingError(
1100 'access to test registers is not allowed', instruction)
1102 m = re.match(MemoryRE() + r'$', op)
1103 if m is not None and m.group('memory_segment') is not None:
1104 raise SandboxingError(
1105 'segments in memory references are not allowed', instruction)
1109 if _InstructionNameIn(
1111 ['mov', # including MOVQ
1112 'add', 'sub', 'and', 'or', 'xor',
1114 'inc', 'dec', 'neg', 'not',
1115 'shl', 'shr', 'sar', 'rol', 'ror', 'rcl', 'rcr',
1120 'prefetch', 'prefetchnta', 'prefetcht0', 'prefetcht1', 'prefetcht2',
1122 'adc', 'sbb', 'bsf', 'bsr',
1123 'lzcnt', 'tzcnt', 'popcnt', 'crc32', 'cmpxchg',
1125 'movmskpd', 'movmskps', 'movnti',
1126 'btc', 'btr', 'bts', 'bt',
1128 'imul', 'mul', 'div', 'idiv', 'push',
1129 ]) or name in ['movd', 'vmovd']:
1130 return Condition(), Condition()
1133 'cpuid', 'hlt', 'lahf', 'sahf', 'rdtsc', 'pause',
1134 'sfence', 'lfence', 'mfence',
1136 'cmc', 'clc', 'cld', 'stc', 'std',
1137 'cwtl', 'cbtw', 'cltq', # CBW/CWDE/CDQE
1138 'cltd', 'cwtd', 'cqto', # CWD/CDQ/CQO
1141 return Condition(), Condition()
1143 elif re.match(r'mov[sz][bwl][lqw]$', name): # MOVSX, MOVSXD, MOVZX
1144 return Condition(), Condition()
1146 elif name == 'bswap':
1147 if ops[0] not in REGS32:
1148 raise SandboxingError(
1149 'bswap is only allowed with 32-bit operands',
1151 return Condition(), Condition()
1153 elif re.match(r'(cmov|set)%s$' % _CONDITION_SUFFIX_RE, name):
1154 return Condition(), Condition()
1156 elif name in _X87_INSTRUCTIONS:
1157 return Condition(), Condition()
1159 elif name in _MMX_INSTRUCTIONS:
1160 return Condition(), Condition()
1162 elif name in _XMM_AVX_INSTRUCTIONS:
1163 return Condition(), Condition()
1165 elif name in ['maskmovq', 'maskmovdqu', 'vmaskmovdqu']:
1166 # In 64-bit mode these instructions are processed in
1167 # ValidateSuperinstruction64, together with string instructions.
1168 return Condition(), Condition()
1171 raise DoNotMatchError(instruction)
1174 precondition = Condition()
1175 postcondition = Condition()
1176 zero_extending = False
1177 touches_memory = True
1179 # Here we determine which operands instruction writes to. Note that for
1180 # our purposes writes are only relevant when they either have potential to
1181 # zero-extend regular register, or can modify protected registers (r15,
1183 # This means that we don't have to worry about implicit operands (for
1184 # example it does not matter to us that mul writes to rdx and rax).
1186 if (_InstructionNameIn(
1188 'mov', # including MOVQ
1191 'add', 'sub', 'and', 'or', 'xor']) or
1192 name in ['movd', 'vmovd', 'vmovq']):
1193 # Technically, movabs is not allowed, but it's ok to accept it here,
1194 # because it will later be rejected because of improper memory access.
1195 # On the other hand, because of objdump quirk it prints regular
1196 # mov with 64-bit immediate as movabs:
1197 # 48 b8 00 00 00 00 00 00 00 00
1199 assert len(ops) == 2
1200 zero_extending = True
1201 write_ops = [ops[1]]
1203 elif re.match(r'mov[sz][bwl][lqw]$', name): # MOVSX, MOVSXD, MOVZX
1204 assert len(ops) == 2
1205 zero_extending = True
1206 write_ops = [ops[1]]
1208 elif _InstructionNameIn(name, ['xchg', 'xadd']):
1209 assert len(ops) == 2
1210 zero_extending = True
1213 elif _InstructionNameIn(name, ['inc', 'dec', 'neg', 'not']):
1214 assert len(ops) == 1
1215 zero_extending = True
1218 elif _InstructionNameIn(name, [
1219 'shl', 'shr', 'sar', 'rol', 'ror', 'rcl', 'rcr']):
1220 assert len(ops) in [1, 2]
1221 write_ops = [ops[-1]]
1223 elif _InstructionNameIn(name, ['shld', 'shrd']):
1224 assert len(ops) == 3
1225 write_ops = [ops[2]]
1227 elif _InstructionNameIn(name, [
1228 'pop', 'cmpxchg8b', 'cmpxchg16b']):
1229 assert len(ops) == 1
1233 assert len(ops) == 2
1234 write_ops = [ops[1]]
1235 touches_memory = False
1236 zero_extending = True
1238 elif _InstructionNameIn(name, ['nop']):
1239 assert len(ops) in [0, 1]
1241 touches_memory = False
1244 'prefetch', 'prefetchnta', 'prefetcht0', 'prefetcht1', 'prefetcht2',
1246 assert len(ops) == 1
1248 touches_memory = False
1250 elif _InstructionNameIn(
1252 ['adc', 'sbb', 'bsf', 'bsr',
1253 'lzcnt', 'tzcnt', 'popcnt', 'crc32', 'cmpxchg',
1255 'movmskpd', 'movmskps', 'movnti']):
1256 assert len(ops) == 2
1257 write_ops = [ops[1]]
1259 elif _InstructionNameIn(name, ['btc', 'btr', 'bts', 'bt']):
1260 assert len(ops) == 2
1261 # bt* accept arbitrarily large bit offset when second
1262 # operand is memory and offset is in register.
1263 # Interestingly, when offset is immediate, it's taken modulo operand size,
1264 # even when second operand is memory.
1265 # Also, validator currently disallows
1266 # bt* <register>, <register>
1267 # which is techincally safe. We disallow it in spec as well for
1269 if not re.match(_ImmediateRE() + r'$', ops[0]):
1270 raise SandboxingError(
1271 'bt* is only allowed with immediate as bit offset',
1273 if _InstructionNameIn(name, ['bt']):
1276 write_ops = [ops[1]]
1278 elif _InstructionNameIn(name, ['cmp', 'test']):
1279 assert len(ops) == 2
1282 elif name == 'bswap':
1283 assert len(ops) == 1
1284 if ops[0] not in REGS32 + REGS64:
1285 raise SandboxingError(
1286 'bswap is only allowed with 32-bit and 64-bit operands',
1291 'cpuid', 'hlt', 'lahf', 'sahf', 'rdtsc', 'pause',
1292 'sfence', 'lfence', 'mfence',
1293 'cmc', 'clc', 'cld', 'stc', 'std',
1294 'cwtl', 'cbtw', 'cltq', # CBW/CWDE/CDQE
1295 'cltd', 'cwtd', 'cqto', # CWD/CDQ/CQO
1298 assert len(ops) == 0
1301 elif _InstructionNameIn(name, ['imul']):
1305 zero_extending = True
1306 write_ops = [ops[1]]
1308 zero_extending = True
1309 write_ops = [ops[2]]
1313 elif _InstructionNameIn(name, ['mul', 'div', 'idiv', 'push']):
1314 assert len(ops) == 1
1317 elif re.match(r'cmov%s$' % _CONDITION_SUFFIX_RE, name):
1318 assert len(ops) == 2
1319 write_ops = [ops[1]]
1321 elif re.match(r'set%s$' % _CONDITION_SUFFIX_RE, name):
1322 assert len(ops) == 1
1325 elif name in _X87_INSTRUCTIONS:
1326 assert 0 <= len(ops) <= 2
1327 # Actually, x87 instructions can write to x87 registers and to memory,
1328 # and there is even one instruction (fstsw/fnstsw) that writes to ax.
1329 # But these writes do not matter for sandboxing purposes.
1332 elif name in _MMX_INSTRUCTIONS:
1333 assert 0 <= len(ops) <= 3
1334 write_ops = ops[-1:]
1336 elif name in _XMM_AVX_INSTRUCTIONS:
1337 assert 0 <= len(ops) <= 5
1338 write_ops = ops[-1:]
1341 raise DoNotMatchError(instruction)
1344 precondition = _ProcessMemoryAccess(instruction, ops)
1346 postcondition = _ProcessOperandWrites(
1347 instruction, write_ops, zero_extending)
1349 return precondition, postcondition
1352 assert False, bitness
1355 def ValidateDirectJump(instruction, bitness):
1356 assert bitness in [32, 64]
1357 cond_jumps_re = re.compile(
1359 r'(?P<name>j%s|loop(n?e)?|j[er]?cxz)(?P<branch_hint>,p[nt])? %s$'
1360 % (_CONDITION_SUFFIX_RE, _HexRE('destination')))
1361 m = cond_jumps_re.match(instruction.disasm)
1363 if (m.group('name') == 'jcxz' or
1364 (m.group('name') == 'jecxz' and bitness == 64)):
1365 raise SandboxingError('disallowed form of jcxz instruction', instruction)
1367 if (m.group('name').startswith('loop') and
1368 m.group('branch_hint') is not None):
1369 raise SandboxingError(
1370 'branch hints are not allowed with loop instruction', instruction)
1371 # Unfortunately we can't rely on presence of 'data16' prefix in disassembly,
1372 # because neither nacl-objdump nor objdump we base our decoder print it.
1373 # So we look at bytes.
1374 if 0x66 in _GetLegacyPrefixes(instruction):
1375 raise SandboxingError(
1376 '16-bit conditional jumps are disallowed', instruction)
1377 return int(m.group('destination'), 16)
1379 jumps_re = re.compile(r'(jmp|call)(|w|q) %s$' % _HexRE('destination'))
1380 m = jumps_re.match(instruction.disasm)
1382 if m.group(2) == 'w':
1383 raise SandboxingError('16-bit jumps are disallowed', instruction)
1384 return int(m.group('destination'), 16)
1386 raise DoNotMatchError(instruction)
1389 def ValidateDirectJumpOrRegularInstruction(instruction, bitness):
1390 """Validate anything that is not superinstruction.
1393 instruction: objdump_parser.Instruction tuple.
1396 Triple (jump_destination, precondition, postcondition).
1397 jump_destination is either absolute offset or None if instruction is not
1398 jump. Pre/postconditions are as in ValidateRegularInstructions.
1400 According to usual convention.
1402 assert bitness in [32, 64]
1404 destination = ValidateDirectJump(instruction, bitness)
1405 return destination, Condition(), Condition()
1406 except DoNotMatchError:
1409 precondition, postcondition = ValidateRegularInstruction(instruction, bitness)
1410 return None, precondition, postcondition
1413 def ValidateSuperinstruction32(superinstruction):
1414 """Validate superinstruction with ia32 set of regexps.
1416 If set of instructions includes something unknown (unknown functions
1417 or prefixes, wrong number of instructions, etc), then assert is triggered.
1419 There corner case exist: naclcall/nacljmp instruction sequences are too
1420 complex to process by DFA alone (it produces too large DFA and MSVC chokes
1421 on it) thus it's verified partially by DFA and partially by code in
1422 actions. For these we generate either "True" or "False".
1425 superinstruction: list of objdump_parser.Instruction tuples
1428 call_jmp = re.compile(
1429 r'(call|jmp) ' # call or jmp
1430 r'[*](?P<register>%e[a-z]+)$') # register name
1432 # TODO(shcherbina): actually we only want to allow 0xffffffe0 as a mask,
1433 # but it's safe anyway because what really matters is that lower 5 bits
1434 # of the mask are zeroes.
1435 # Disallow 0xe0 once
1436 # https://code.google.com/p/nativeclient/issues/detail?id=3164 is fixed.
1437 and_for_call_jmp = re.compile(
1438 r'and [$]0x(ffffff)?e0,(?P<register>%e[a-z]+)$')
1440 dangerous_instruction = superinstruction[-1].disasm
1442 if call_jmp.match(dangerous_instruction):
1443 # If "dangerous instruction" is call or jmp then we need to check if two
1446 if len(superinstruction) != 2:
1447 raise DoNotMatchError(superinstruction)
1449 m = and_for_call_jmp.match(superinstruction[0].disasm)
1451 raise DoNotMatchError(superinstruction)
1452 register_and = m.group('register')
1454 m = call_jmp.match(dangerous_instruction)
1456 raise DoNotMatchError(superinstruction)
1457 register_call_jmp = m.group('register')
1459 if register_and == register_call_jmp:
1460 for instruction in superinstruction:
1461 _GetLegacyPrefixes(instruction) # to detect repeated prefixes
1464 raise SandboxingError(
1465 'nacljump32/naclcall32: {0} != {1}'.format(
1466 register_and, register_call_jmp),
1469 raise DoNotMatchError(superinstruction)
1472 def ValidateSuperinstruction64(superinstruction):
1473 """Validate superinstruction with x86-64 set of regexps.
1475 If set of instructions includes something unknown (unknown functions
1476 or prefixes, wrong number of instructions, etc), then assert is triggered.
1478 There corner case exist: naclcall/nacljmp instruction sequences are too
1479 complex to process by DFA alone (it produces too large DFA and MSVC chokes
1480 on it) thus it's verified partially by DFA and partially by code in
1481 actions. For these we generate either "True" or "False", other
1482 superinstruction always produce "True" or throw an error.
1485 superinstruction: list of objdump_parser.Instruction tuples
1488 dangerous_instruction = superinstruction[-1].disasm
1490 # This is dangerous instructions in naclcall/nacljmp
1491 callq_jmpq = re.compile(
1492 r'(callq|jmpq) ' # callq or jmpq
1493 r'[*](?P<register>%r[0-9a-z]+)$') # register name
1494 # These are sandboxing instructions for naclcall/nacljmp
1495 # TODO(shcherbina): actually we only want to allow 0xffffffe0 as a mask,
1496 # but it's safe anyway because what really matters is that lower 5 bits
1497 # of the mask are zeroes.
1498 # Disallow 0xe0 once
1499 # https://code.google.com/p/nativeclient/issues/detail?id=3164 is fixed.
1500 and_for_callq_jmpq = re.compile(
1501 r'and [$]0x(f)*e0,(?P<register>%e[a-z][a-z]|%r[89]d|%r1[0-4]d)$')
1502 add_for_callq_jmpq = re.compile(
1503 r'add %r15,(?P<register>%r[0-9a-z]+)$')
1505 if callq_jmpq.match(dangerous_instruction):
1506 # If "dangerous instruction" is callq or jmpq then we need to check if all
1509 if len(superinstruction) != 3:
1510 raise DoNotMatchError(superinstruction)
1512 m = and_for_callq_jmpq.match(superinstruction[0].disasm)
1514 raise DoNotMatchError(superinstruction)
1515 register_and = m.group('register')
1517 m = add_for_callq_jmpq.match(superinstruction[1].disasm)
1519 raise DoNotMatchError(superinstruction)
1520 register_add = m.group('register')
1522 m = callq_jmpq.match(dangerous_instruction)
1524 raise DoNotMatchError(superinstruction)
1525 register_callq_jmpq = m.group('register')
1527 # Double-check that registers are 32-bit and convert them to 64-bit so
1528 # they can be compared
1529 if register_and[1] == 'e':
1530 register_and = '%r' + register_and[2:]
1531 elif re.match(r'%r\d+d', register_and):
1532 register_and = register_and[:-1]
1534 assert False, ('Unknown (or possible non-32-bit) register found. '
1535 'This should never happen!')
1536 if register_and == register_add == register_callq_jmpq:
1537 for instruction in superinstruction:
1538 _GetLegacyPrefixes(instruction) # to detect repeated prefixes
1541 raise SandboxingError(
1542 'nacljump64/naclcall64: registers do not match ({0}, {1}, {2})'.format(
1543 register_and, register_add, register_callq_jmpq),
1546 raise DoNotMatchError(superinstruction)
1548 # These are dangerous string instructions (there are three cases)
1549 string_instruction_rdi_no_rsi = re.compile(
1550 r'(maskmovq %mm[0-7],%mm[0-7]|' # maskmovq
1551 r'v?maskmovdqu %xmm([0-9]|1[0-5]),%xmm([0-9]|1[0-5])|' # [v]maskmovdqu
1552 r'((repnz|repz) )?scas %es:[(]%rdi[)],(%al|%ax|%eax|%rax)|' # scas
1553 r'(rep )?stos (%al|%ax|%eax|%rax),%es:[(]%rdi[)])$') # stos
1554 string_instruction_rsi_no_rdi = re.compile(
1555 r'(rep )?lods %ds:[(]%rsi[)],(%al|%ax|%eax|%rax)$') # lods
1556 string_instruction_rsi_rdi = re.compile(
1557 r'(((repnz|repz) )?cmps[blqw] %es:[(]%rdi[)],%ds:[(]%rsi[)]|' # cmps
1558 r'(rep )?movs[blqw] %ds:[(]%rsi[)],%es:[(]%rdi[)])$') # movs
1559 # These are sandboxing instructions for string instructions
1560 mov_esi_esi = re.compile(r'mov %esi,%esi$')
1561 lea_r15_rsi_rsi = re.compile(r'lea [(]%r15,%rsi,1[)],%rsi$')
1562 mov_edi_edi = re.compile(r'mov %edi,%edi$')
1563 lea_r15_rdi_rdi = re.compile(r'lea [(]%r15,%rdi,1[)],%rdi$')
1565 if string_instruction_rsi_no_rdi.match(dangerous_instruction):
1566 if len(superinstruction) != 3:
1567 raise DoNotMatchError(superinstruction)
1568 if mov_esi_esi.match(superinstruction[0].disasm) is None:
1569 raise DoNotMatchError(superinstruction)
1570 if lea_r15_rsi_rsi.match(superinstruction[1].disasm) is None:
1571 raise DoNotMatchError(superinstruction)
1573 elif string_instruction_rdi_no_rsi.match(dangerous_instruction):
1574 if len(superinstruction) != 3:
1575 raise DoNotMatchError(superinstruction)
1576 if mov_edi_edi.match(superinstruction[0].disasm) is None:
1577 raise DoNotMatchError(superinstruction)
1578 if lea_r15_rdi_rdi.match(superinstruction[1].disasm) is None:
1579 raise DoNotMatchError(superinstruction)
1581 elif string_instruction_rsi_rdi.match(dangerous_instruction):
1582 if len(superinstruction) != 5:
1583 raise DoNotMatchError(superinstruction)
1584 if mov_esi_esi.match(superinstruction[0].disasm) is None:
1585 raise DoNotMatchError(superinstruction)
1586 if lea_r15_rsi_rsi.match(superinstruction[1].disasm) is None:
1587 raise DoNotMatchError(superinstruction)
1588 if mov_edi_edi.match(superinstruction[2].disasm) is None:
1589 raise DoNotMatchError(superinstruction)
1590 if lea_r15_rdi_rdi.match(superinstruction[3].disasm) is None:
1591 raise DoNotMatchError(superinstruction)
1594 raise DoNotMatchError(superinstruction)
1596 for instruction in superinstruction:
1597 _GetLegacyPrefixes(instruction) # to detect repeated prefixes