2 # Copyright (c) 2013 The Native Client Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 # Executable specification of valid instructions and superinstructions (in terms
7 # of their disassembler listing).
8 # Should serve as formal and up-to-date ABI reference and as baseline for
9 # validator exhaustive tests.
11 # It is generally organized as a set of functions responsible for recognizing
12 # and validating specific patterns (jump instructions, regular instructions,
13 # superinstructions, etc.)
14 # There are three outcomes for running such function:
15 # - function raises DoNotMatchError (which means instruction is of completely
16 # different structure, for example when we call ValidateSuperinstruction on
18 # - function raises SandboxingError (which means instruction generally matches
19 # respective pattern, but some rules are violated)
20 # - function returns (which means instruction(s) is(are) safe)
22 # Why exceptions instead of returning False or something? Because they carry
23 # stack traces, which makes it easier to investigate why particular instruction
25 # Why distinguish DoNotMatchError and SandboxingError? Because on the topmost
26 # level we attempt to call all matchers and we need to see which error message
32 class DoNotMatchError(Exception):
36 class SandboxingError(Exception):
43 def _ValidateLongNop(instruction):
44 # Short nops do not require special exceptions (such as allowing repeated
45 # prefixes and segment access), so they are handled as regular instructions.
46 if re.match(r'nopw 0x0\(%[er]ax,%[er]ax,1\)$',
50 r'(data32 )*nopw %cs:0x0\(%[er]ax,%[er]ax,1\)$',
53 raise DoNotMatchError(instruction)
56 def _ValidateStringInstruction(instruction):
57 prefix_re = r'(rep |repz |repnz )?'
58 lods_re = r'lods %ds:\(%esi\),(%al|%ax|%eax)'
59 stos_re = r'stos (%al|%ax|%eax),%es:\(%edi\)'
60 scas_re = r'scas %es:\(%edi\),(%al|%ax|%eax)'
61 movs_re = r'movs[bwl] %ds:\(%esi\),%es:\(%edi\)'
62 cmps_re = r'cmps[bwl] %es:\(%edi\),%ds:\(%esi\)'
64 string_insn_re = '%s(%s)$' % (
66 '|'.join([lods_re, stos_re, scas_re, movs_re, cmps_re]))
68 if re.match(string_insn_re, instruction.disasm):
71 raise DoNotMatchError(instruction)
74 def _ValidateTlsInstruction(instruction):
75 if re.match(r'mov %gs:(0x0|0x4),%e[a-z][a-z]$', instruction.disasm):
78 raise DoNotMatchError(instruction)
81 # What can follow 'j' in conditional jumps 'je', 'jno', etc.
82 _CONDITION_SUFFIX_RE = r'(a(e?)|b(e?)|g(e?)|l(e?)|(n?)e|(n?)o|(n?)p|(n?)s)'
85 def _AnyRegisterRE(group_name='register'):
86 # TODO(shcherbina): explicitly list all kinds of registers we care to
87 # distinguish for validation purposes.
88 return r'(?P<%s>%%(st\(\d+\)|\w+))' % group_name
91 def _HexRE(group_name='value'):
92 return r'(?P<%s>-?0x[\da-f]+)' % group_name
95 def _ImmediateRE(group_name='immediate'):
96 return r'(?P<%s>\$%s)' % (
98 _HexRE(group_name=group_name + '_value'))
101 def _MemoryRE(group_name='memory'):
109 return r'(?P<%s>(?P<%s_segment>%%[cdefgs]s:)?%s?(\(%s?(,%s,\d)?\))?)' % (
112 _HexRE(group_name=group_name + '_offset'),
113 _AnyRegisterRE(group_name=group_name + '_base'),
114 _AnyRegisterRE(group_name=group_name + '_index'))
117 def _IndirectJumpTargetRE(group_name='target'):
118 return r'(?P<%s>\*(%s|%s))' % (
120 _AnyRegisterRE(group_name=group_name + '_register'),
121 _MemoryRE(group_name=group_name + '_memory'))
124 def _OperandRE(group_name='operand'):
125 return r'(?P<%s>%s|%s|%s|%s)' % (
127 _AnyRegisterRE(group_name=group_name + '_register'),
128 _ImmediateRE(group_name=group_name + '_immediate'),
129 _MemoryRE(group_name=group_name + '_memory'),
130 _IndirectJumpTargetRE(group_name=group_name + '_target'))
133 def _SplitOps(insn, args):
134 # We can't use just args.split(',') because operands can contain commas
135 # themselves, for example '(%r15,%rax,1)'.
139 # We do not use mere re.match(_OperandRE(), args, i) here because
140 # python backtracking regexes do not guarantee to find longest match.
141 m = re.compile(r'(%s)($|,)' % _OperandRE()).match(args, i)
142 assert m is not None, (args, i)
143 ops.append(m.group(1))
147 assert args[i] == ',', (insn, args, i)
152 def _ParseInstruction(instruction):
154 disasm, _, _ = instruction.disasm.partition('#')
155 elems = disasm.split()
158 raise SandboxingError(
159 'disasm is empty', instruction)
162 while elems != [] and elems[0] in [
163 'lock', 'rep', 'repz', 'repnz',
164 'data16', 'data32', 'addr16', 'addr32', 'addr64']:
165 prefixes.append(elems.pop(0))
168 raise SandboxingError(
169 'dangling legacy prefixes', instruction)
173 if re.match(r'rex([.]W?R?X?B?)?$', name):
174 raise SandboxingError('dangling rex prefix', instruction)
176 # There could be branching expectation information in instruction names:
179 name_re = r'[a-z]\w*(,p[nt])?$'
180 assert re.match(name_re, name) or name == "nop/reserved", name
184 elif len(elems) == 2:
185 ops = _SplitOps(instruction, elems[1])
187 assert False, instruction
189 return prefixes, name, ops
210 REGS32 = REG32_TO_REG64.keys()
211 REGS64 = REG32_TO_REG64.values()
214 class Condition(object):
215 """Represents assertion about the state of 64-bit registers.
217 (used as precondition and postcondition)
219 Supported assertions:
220 0. %rpb and %rsp are sandboxed (and nothing is known about other registers)
221 1. {%rax} is restricted, %rbp and %rsp are sandboxed
222 2-13. same for %rbx-%r14 not including %rbp and %rsp
223 14. %rbp is restricted, %rsp is sandboxed
224 15. %rsp is restricted, %rpb is sandboxed
226 It can be observed that all assertions 1..15 differ from default 0 in a single
227 register, which prompts internal representation of a single field,
228 _restricted_register, which stores name of this standing out register
231 * 'restricted' means higher 32 bits are zeroes
232 * 'sandboxed' means within [%r15, %r15 + 2**32) range
233 It goes without saying that %r15 is never changed and by definition sandboxed.
236 def __init__(self, restricted=None, restricted_instead_of_sandboxed=None):
237 self._restricted_register = None
238 if restricted is not None:
239 assert restricted_instead_of_sandboxed is None
240 assert restricted in REGS64
241 assert restricted not in ['%r15', '%rbp', '%rsp']
242 self._restricted_register = restricted
243 if restricted_instead_of_sandboxed is not None:
244 assert restricted is None
245 assert restricted_instead_of_sandboxed in ['%rbp', '%rsp']
246 self._restricted_register = restricted_instead_of_sandboxed
248 def GetAlteredRegisters(self):
249 """ Return pair (restricted, restricted_instead_of_sandboxed).
251 Each item is either register name or None.
253 if self._restricted_register is None:
255 elif self._restricted_register in ['%rsp', '%rbp']:
256 return None, self._restricted_register
258 return self._restricted_register, None
260 def __eq__(self, other):
261 return self._restricted_register == other._restricted_register
263 def __ne__(self, other):
264 return not self == other
266 def Implies(self, other):
267 return self.WhyNotImplies(other) is None
269 def WhyNotImplies(self, other):
270 if other._restricted_register is None:
271 if self._restricted_register in ['%rbp', '%rsp']:
272 return '%s should not be restricted' % self._restricted_register
276 if self._restricted_register != other._restricted_register:
278 'register %s should be restricted, '
279 'while in fact %r is restricted' % (
280 other._restricted_register, self._restricted_register))
285 if self._restricted_register is None:
286 return 'Condition(default)'
287 elif self._restricted_register in ['%rbp', '%rsp']:
288 return ('Condition(%s restricted instead of sandboxed)'
289 % self._restricted_register)
291 return 'Condition(%s restricted)' % self._restricted_register
297 if reg not in ['%r15', '%rbp', '%rsp']:
298 yield Condition(restricted=reg)
299 yield Condition(restricted_instead_of_sandboxed='%rbp')
300 yield Condition(restricted_instead_of_sandboxed='%rsp')
303 def _ValidateSpecialStackInstruction(instruction):
304 # Validate 64-bit instruction that is in special relationship with rsp/rbp.
306 if instruction.disasm in ['mov %rsp,%rbp', 'mov %rbp,%rsp']:
307 return Condition(), Condition()
310 'and %s,%%rsp$' % _ImmediateRE(),
313 # We only allow 1-byte immediate, so we have to look at machine code.
314 if (len(instruction.bytes) == 4 and
315 0x48 <= instruction.bytes[0] <= 0x4f and
316 instruction.bytes[1:3] == [0x83, 0xe4]):
317 # We extract mask from bytes, not from textual representation, because
318 # objdump and RDFA decoder print it differently
319 # (-1 is displayed as '0xffffffffffffffff' by objdump and as '0xff' by
321 # See https://code.google.com/p/nativeclient/issues/detail?id=3164
322 mask = instruction.bytes[3]
323 assert mask == int(m.group('immediate_value'), 16) & 0xff
325 raise SandboxingError(
326 'mask should be negative to ensure that higher '
327 'bits of %rsp do not change',
330 raise SandboxingError(
331 'unsupported form of "and <mask>,%rsp" instruction', instruction)
332 return Condition(), Condition()
334 if (instruction.disasm in ['add %r15,%rbp', 'add %r15,%rbp'] or
335 re.match(r'lea (0x0+)?\(%rbp,%r15,1\),%rbp$', instruction.disasm)):
336 return Condition(restricted_instead_of_sandboxed='%rbp'), Condition()
338 if (instruction.disasm in ['add %r15,%rsp', 'add %r15,%rsp'] or
339 re.match(r'lea (0x0+)?\(%rsp,%r15,1\),%rsp$', instruction.disasm)):
340 return Condition(restricted_instead_of_sandboxed='%rsp'), Condition()
342 # TODO(shcherbina): disallow this instruction once
343 # http://code.google.com/p/nativeclient/issues/detail?id=3070
345 if instruction.disasm == 'or %r15,%rsp':
346 return Condition(restricted_instead_of_sandboxed='%rsp'), Condition()
348 raise DoNotMatchError(instruction)
351 def _GetLegacyPrefixes(instruction):
353 for b in instruction.bytes:
355 0x66, 0x67, 0x2e, 0x3e, 0x26, 0x64, 0x65, 0x36, 0xf0, 0xf3, 0xf2]:
358 raise SandboxingError('addr prefix is not allowed', instruction)
360 raise SandboxingError('duplicate legacy prefix', instruction)
365 def _ProcessMemoryAccess(instruction, operands):
366 """Make sure that memory access is valid and return precondition required.
368 (only makes sense for 64-bit instructions)
371 instruction: Instruction tuple
372 operands: list of instruction operands as strings, for example
373 ['%eax', '(%r15,%rbx,1)']
375 Condition object representing precondition required for memory access (if
376 it's present among operands) to be valid.
378 SandboxingError if memory access is invalid.
380 precondition = Condition()
382 m = re.match(_MemoryRE() + r'$', op)
384 assert m.group('memory_segment') is None
385 base = m.group('memory_base')
386 index = m.group('memory_index')
387 allowed_bases = ['%r15', '%rbp', '%rsp', '%rip']
388 if base not in allowed_bases:
389 raise SandboxingError(
390 'memory access only is allowed with base from %s'
393 if index is not None:
396 elif index in REGS64:
397 if index in ['%r15', '%rsp', '%rbp']:
398 raise SandboxingError(
399 '%s can\'t be used as index in memory access' % index,
402 assert precondition == Condition()
403 precondition = Condition(restricted=index)
405 raise SandboxingError(
406 'unrecognized register is used for memory access as index',
411 def _ProcessOperandWrites(instruction, write_operands, zero_extending=False):
412 """Check that writes to operands are valid, return postcondition established.
414 (only makes sense for 64-bit instructions)
417 instruction: Instruction tuple
418 write_operands: list of operands instruction writes to as strings,
419 for example ['%eax', '(%r15,%rbx,1)']
420 zero_extending: whether instruction is considered zero extending
422 Condition object representing postcondition established by operand writes.
424 SandboxingError if write is invalid.
426 postcondition = Condition()
427 for i, op in enumerate(write_operands):
428 if op in ['%r15', '%r15d', '%r15w', '%r15b']:
429 raise SandboxingError('changes to r15 are not allowed', instruction)
430 if op in ['%bpl', '%bp', '%rbp']:
431 raise SandboxingError('changes to rbp are not allowed', instruction)
432 if op in ['%spl', '%sp', '%rsp']:
433 raise SandboxingError('changes to rsp are not allowed', instruction)
436 # Only last of the operand writes is considered zero-extending.
439 # does not zero-extend %rax.
440 if zero_extending and i == len(write_operands) - 1:
441 r = REG32_TO_REG64[op]
442 if r in ['%rbp', '%rsp']:
443 postcondition = Condition(restricted_instead_of_sandboxed=r)
445 postcondition = Condition(restricted=r)
447 if op in ['%ebp', '%esp']:
448 raise SandboxingError(
449 'non-zero-extending changes to ebp and esp are not allowed',
455 def _InstructionNameIn(name, candidates):
456 return re.match('(%s)[bwlq]?$' % '|'.join(candidates), name) is not None
459 _X87_INSTRUCTIONS = set([
462 'fadd', 'fadds', 'faddl', 'faddp',
468 'fcmovb', 'fcmovbe', 'fcmove', 'fcmovnb',
469 'fcmovnbe', 'fcmovne', 'fcmovnu', 'fcmovu',
470 'fcom', 'fcoms', 'fcoml',
471 'fcomp', 'fcomps', 'fcompl',
477 'fdiv', 'fdivs', 'fdivl',
479 'fdivr', 'fdivrs', 'fdivrl',
487 'fild', 'fildl', 'fildll',
491 'fistp', 'fistpl', 'fistpll',
492 'fisttp', 'fisttpl', 'fisttpll',
493 'fld', 'flds', 'fldl', 'fldt',
503 'fmul', 'fmuls', 'fmull',
518 'fst', 'fsts', 'fstl',
519 'fstp', 'fstps', 'fstpl', 'fstpt',
523 'fsub', 'fsubs', 'fsubl',
525 'fsubr', 'fsubrs', 'fsubrl',
530 'fucom', 'fucomp', 'fucompp',
541 # Instructions from mmx_instructions.def (besides MMX, they include SSE2/3
542 # and other stuff that works with MMX registers).
543 _MMX_INSTRUCTIONS = set([
659 # Instructions from xmm_instructions.def (that is, instructions that work
660 # with XMM registers). These instruction names can be prepended with 'v', which
661 # results in their AVX counterpart.
662 _XMM_AVX_INSTRUCTIONS = set([
693 'cvtsi2sd', 'cvtsi2sdl', 'cvtsi2sdq',
694 'cvtsi2ss', 'cvtsi2ssl', 'cvtsi2ssq',
909 _XMM_AVX_INSTRUCTIONS.update(['v' + name for name in _XMM_AVX_INSTRUCTIONS])
911 _XMM_AVX_INSTRUCTIONS.update([
987 # Add instructions like VFMADDPD/VFMADD132PD/VFMADD213PD/VFMADD231PD.
1010 for operand_order_suffix in ['', '132', '213', '231']:
1011 _XMM_AVX_INSTRUCTIONS.add(fma_name % operand_order_suffix)
1013 for cmp_suffix in ['pd', 'ps', 'sd', 'ss']:
1014 for cmp_op in ['', 'eq', 'lt', 'le', 'unord', 'neq', 'nlt', 'nle', 'ord']:
1015 _XMM_AVX_INSTRUCTIONS.add('cmp%s%s' % (cmp_op, cmp_suffix))
1016 _XMM_AVX_INSTRUCTIONS.add('vcmp%s%s' % (cmp_op, cmp_suffix))
1018 'eq_uq', 'nge', 'ngt', 'false',
1019 'neq_oq', 'ge', 'gt', 'true',
1020 'eq_os', 'lt_oq', 'le_oq', 'unord_s',
1021 'neq_us', 'nlt_uq', 'nle_uq', 'ord_s',
1022 'eq_us', 'nge_uq', 'ngt_uq', 'false_os',
1023 'neq_os', 'ge_oq', 'gt_oq', 'true_us']:
1024 _XMM_AVX_INSTRUCTIONS.add('vcmp%s%s' % (cmp_op, cmp_suffix))
1027 def ValidateRegularInstruction(instruction, bitness):
1028 """Validate regular instruction (not direct jump).
1031 instruction: objdump_parser.Instruction tuple
1034 Pair (precondition, postcondition) of Condition instances.
1035 (for 32-bit case they are meaningless and are not used)
1037 According to usual convention.
1039 assert bitness in [32, 64]
1041 if instruction.disasm.startswith('.byte ') or '(bad)' in instruction.disasm:
1042 raise SandboxingError('objdump failed to decode', instruction)
1045 _ValidateLongNop(instruction)
1046 return Condition(), Condition()
1047 except DoNotMatchError:
1050 # Report error on duplicate prefixes (note that they are allowed in
1052 _GetLegacyPrefixes(instruction)
1056 _ValidateStringInstruction(instruction)
1057 return Condition(), Condition()
1058 except DoNotMatchError:
1062 _ValidateTlsInstruction(instruction)
1063 return Condition(), Condition()
1064 except DoNotMatchError:
1069 return _ValidateSpecialStackInstruction(instruction)
1070 except DoNotMatchError:
1073 prefixes, name, ops = _ParseInstruction(instruction)
1075 for prefix in prefixes:
1076 if prefix != 'lock':
1077 raise SandboxingError('prefix %s is not allowed' % prefix, instruction)
1080 if op in ['%cs', '%ds', '%es', '%ss', '%fs', '%gs']:
1081 raise SandboxingError(
1082 'access to segment registers is not allowed', instruction)
1083 if op.startswith('%cr'):
1084 raise SandboxingError(
1085 'access to control registers is not allowed', instruction)
1086 if op.startswith('%db'):
1087 raise SandboxingError(
1088 'access to debug registers is not allowed', instruction)
1089 if op.startswith('%tr'):
1090 raise SandboxingError(
1091 'access to test registers is not allowed', instruction)
1093 m = re.match(_MemoryRE() + r'$', op)
1094 if m is not None and m.group('memory_segment') is not None:
1095 raise SandboxingError(
1096 'segments in memory references are not allowed', instruction)
1100 if _InstructionNameIn(
1102 ['mov', # including MOVQ
1103 'add', 'sub', 'and', 'or', 'xor',
1105 'inc', 'dec', 'neg', 'not',
1106 'shl', 'shr', 'sar', 'rol', 'ror', 'rcl', 'rcr',
1111 'prefetch', 'prefetchnta', 'prefetcht0', 'prefetcht1', 'prefetcht2',
1113 'adc', 'sbb', 'bsf', 'bsr',
1114 'lzcnt', 'tzcnt', 'popcnt', 'crc32', 'cmpxchg',
1116 'movmskpd', 'movmskps', 'movnti',
1117 'btc', 'btr', 'bts', 'bt',
1119 'imul', 'mul', 'div', 'idiv', 'push',
1120 ]) or name in ['movd', 'vmovd']:
1121 return Condition(), Condition()
1124 'cpuid', 'hlt', 'lahf', 'sahf', 'rdtsc', 'pause',
1125 'sfence', 'lfence', 'mfence',
1127 'cmc', 'clc', 'cld', 'stc', 'std',
1128 'cwtl', 'cbtw', 'cltq', # CBW/CWDE/CDQE
1129 'cltd', 'cwtd', 'cqto', # CWD/CDQ/CQO
1132 return Condition(), Condition()
1134 elif re.match(r'mov[sz][bwl][lqw]$', name): # MOVSX, MOVSXD, MOVZX
1135 return Condition(), Condition()
1137 elif name == 'bswap':
1138 if ops[0] not in REGS32:
1139 raise SandboxingError(
1140 'bswap is only allowed with 32-bit operands',
1142 return Condition(), Condition()
1144 elif re.match(r'(cmov|set)%s$' % _CONDITION_SUFFIX_RE, name):
1145 return Condition(), Condition()
1147 elif name in _X87_INSTRUCTIONS:
1148 return Condition(), Condition()
1150 elif name in _MMX_INSTRUCTIONS:
1151 return Condition(), Condition()
1153 elif name in _XMM_AVX_INSTRUCTIONS:
1154 return Condition(), Condition()
1156 elif name in ['maskmovq', 'maskmovdqu', 'vmaskmovdqu']:
1157 # In 64-bit mode these instructions are processed in
1158 # ValidateSuperinstruction64, together with string instructions.
1159 return Condition(), Condition()
1162 raise DoNotMatchError(instruction)
1165 precondition = Condition()
1166 postcondition = Condition()
1167 zero_extending = False
1168 touches_memory = True
1170 # Here we determine which operands instruction writes to. Note that for
1171 # our purposes writes are only relevant when they either have potential to
1172 # zero-extend regular register, or can modify protected registers (r15,
1174 # This means that we don't have to worry about implicit operands (for
1175 # example it does not matter to us that mul writes to rdx and rax).
1177 if (_InstructionNameIn(
1179 'mov', # including MOVQ
1182 'add', 'sub', 'and', 'or', 'xor']) or
1183 name in ['movd', 'vmovd', 'vmovq']):
1184 # Technically, movabs is not allowed, but it's ok to accept it here,
1185 # because it will later be rejected because of improper memory access.
1186 # On the other hand, because of objdump quirk it prints regular
1187 # mov with 64-bit immediate as movabs:
1188 # 48 b8 00 00 00 00 00 00 00 00
1190 assert len(ops) == 2
1191 zero_extending = True
1192 write_ops = [ops[1]]
1194 elif re.match(r'mov[sz][bwl][lqw]$', name): # MOVSX, MOVSXD, MOVZX
1195 assert len(ops) == 2
1196 zero_extending = True
1197 write_ops = [ops[1]]
1199 elif _InstructionNameIn(name, ['xchg', 'xadd']):
1200 assert len(ops) == 2
1201 zero_extending = True
1204 elif _InstructionNameIn(name, ['inc', 'dec', 'neg', 'not']):
1205 assert len(ops) == 1
1206 zero_extending = True
1209 elif _InstructionNameIn(name, [
1210 'shl', 'shr', 'sar', 'rol', 'ror', 'rcl', 'rcr']):
1211 assert len(ops) in [1, 2]
1212 write_ops = [ops[-1]]
1214 elif _InstructionNameIn(name, ['shld', 'shrd']):
1215 assert len(ops) == 3
1216 write_ops = [ops[2]]
1218 elif _InstructionNameIn(name, [
1219 'pop', 'cmpxchg8b', 'cmpxchg16b']):
1220 assert len(ops) == 1
1224 assert len(ops) == 2
1225 write_ops = [ops[1]]
1226 touches_memory = False
1227 zero_extending = True
1229 elif _InstructionNameIn(name, ['nop']):
1230 assert len(ops) in [0, 1]
1232 touches_memory = False
1235 'prefetch', 'prefetchnta', 'prefetcht0', 'prefetcht1', 'prefetcht2',
1237 assert len(ops) == 1
1239 touches_memory = False
1241 elif _InstructionNameIn(
1243 ['adc', 'sbb', 'bsf', 'bsr',
1244 'lzcnt', 'tzcnt', 'popcnt', 'crc32', 'cmpxchg',
1246 'movmskpd', 'movmskps', 'movnti']):
1247 assert len(ops) == 2
1248 write_ops = [ops[1]]
1250 elif _InstructionNameIn(name, ['btc', 'btr', 'bts', 'bt']):
1251 assert len(ops) == 2
1252 # bt* accept arbitrarily large bit offset when second
1253 # operand is memory and offset is in register.
1254 # Interestingly, when offset is immediate, it's taken modulo operand size,
1255 # even when second operand is memory.
1256 # Also, validator currently disallows
1257 # bt* <register>, <register>
1258 # which is techincally safe. We disallow it in spec as well for
1260 if not re.match(_ImmediateRE() + r'$', ops[0]):
1261 raise SandboxingError(
1262 'bt* is only allowed with immediate as bit offset',
1264 if _InstructionNameIn(name, ['bt']):
1267 write_ops = [ops[1]]
1269 elif _InstructionNameIn(name, ['cmp', 'test']):
1270 assert len(ops) == 2
1273 elif name == 'bswap':
1274 assert len(ops) == 1
1275 if ops[0] not in REGS32 + REGS64:
1276 raise SandboxingError(
1277 'bswap is only allowed with 32-bit and 64-bit operands',
1282 'cpuid', 'hlt', 'lahf', 'sahf', 'rdtsc', 'pause',
1283 'sfence', 'lfence', 'mfence',
1284 'cmc', 'clc', 'cld', 'stc', 'std',
1285 'cwtl', 'cbtw', 'cltq', # CBW/CWDE/CDQE
1286 'cltd', 'cwtd', 'cqto', # CWD/CDQ/CQO
1289 assert len(ops) == 0
1292 elif _InstructionNameIn(name, ['imul']):
1296 zero_extending = True
1297 write_ops = [ops[1]]
1299 zero_extending = True
1300 write_ops = [ops[2]]
1304 elif _InstructionNameIn(name, ['mul', 'div', 'idiv', 'push']):
1305 assert len(ops) == 1
1308 elif re.match(r'cmov%s$' % _CONDITION_SUFFIX_RE, name):
1309 assert len(ops) == 2
1310 write_ops = [ops[1]]
1312 elif re.match(r'set%s$' % _CONDITION_SUFFIX_RE, name):
1313 assert len(ops) == 1
1316 elif name in _X87_INSTRUCTIONS:
1317 assert 0 <= len(ops) <= 2
1318 # Actually, x87 instructions can write to x87 registers and to memory,
1319 # and there is even one instruction (fstsw/fnstsw) that writes to ax.
1320 # But these writes do not matter for sandboxing purposes.
1323 elif name in _MMX_INSTRUCTIONS:
1324 assert 0 <= len(ops) <= 3
1325 write_ops = ops[-1:]
1327 elif name in _XMM_AVX_INSTRUCTIONS:
1328 assert 0 <= len(ops) <= 5
1329 write_ops = ops[-1:]
1332 raise DoNotMatchError(instruction)
1335 precondition = _ProcessMemoryAccess(instruction, ops)
1337 postcondition = _ProcessOperandWrites(
1338 instruction, write_ops, zero_extending)
1340 return precondition, postcondition
1343 assert False, bitness
1346 def ValidateDirectJump(instruction, bitness):
1347 assert bitness in [32, 64]
1348 cond_jumps_re = re.compile(
1350 r'(?P<name>j%s|loop(n?e)?|j[er]?cxz)(?P<branch_hint>,p[nt])? %s$'
1351 % (_CONDITION_SUFFIX_RE, _HexRE('destination')))
1352 m = cond_jumps_re.match(instruction.disasm)
1354 if (m.group('name') == 'jcxz' or
1355 (m.group('name') == 'jecxz' and bitness == 64)):
1356 raise SandboxingError('disallowed form of jcxz instruction', instruction)
1358 if (m.group('name').startswith('loop') and
1359 m.group('branch_hint') is not None):
1360 raise SandboxingError(
1361 'branch hints are not allowed with loop instruction', instruction)
1362 # Unfortunately we can't rely on presence of 'data16' prefix in disassembly,
1363 # because neither nacl-objdump nor objdump we base our decoder print it.
1364 # So we look at bytes.
1365 if 0x66 in _GetLegacyPrefixes(instruction):
1366 raise SandboxingError(
1367 '16-bit conditional jumps are disallowed', instruction)
1368 return int(m.group('destination'), 16)
1370 jumps_re = re.compile(r'(jmp|call)(|w|q) %s$' % _HexRE('destination'))
1371 m = jumps_re.match(instruction.disasm)
1373 if m.group(2) == 'w':
1374 raise SandboxingError('16-bit jumps are disallowed', instruction)
1375 return int(m.group('destination'), 16)
1377 raise DoNotMatchError(instruction)
1380 def ValidateDirectJumpOrRegularInstruction(instruction, bitness):
1381 """Validate anything that is not superinstruction.
1384 instruction: objdump_parser.Instruction tuple.
1387 Triple (jump_destination, precondition, postcondition).
1388 jump_destination is either absolute offset or None if instruction is not
1389 jump. Pre/postconditions are as in ValidateRegularInstructions.
1391 According to usual convention.
1393 assert bitness in [32, 64]
1395 destination = ValidateDirectJump(instruction, bitness)
1396 return destination, Condition(), Condition()
1397 except DoNotMatchError:
1400 precondition, postcondition = ValidateRegularInstruction(instruction, bitness)
1401 return None, precondition, postcondition
1404 def ValidateSuperinstruction32(superinstruction):
1405 """Validate superinstruction with ia32 set of regexps.
1407 If set of instructions includes something unknown (unknown functions
1408 or prefixes, wrong number of instructions, etc), then assert is triggered.
1410 There corner case exist: naclcall/nacljmp instruction sequences are too
1411 complex to process by DFA alone (it produces too large DFA and MSVC chokes
1412 on it) thus it's verified partially by DFA and partially by code in
1413 actions. For these we generate either "True" or "False".
1416 superinstruction: list of objdump_parser.Instruction tuples
1419 call_jmp = re.compile(
1420 r'(call|jmp) ' # call or jmp
1421 r'[*](?P<register>%e[a-z]+)$') # register name
1423 # TODO(shcherbina): actually we only want to allow 0xffffffe0 as a mask,
1424 # but it's safe anyway because what really matters is that lower 5 bits
1425 # of the mask are zeroes.
1426 # Disallow 0xe0 once
1427 # https://code.google.com/p/nativeclient/issues/detail?id=3164 is fixed.
1428 and_for_call_jmp = re.compile(
1429 r'and [$]0x(ffffff)?e0,(?P<register>%e[a-z]+)$')
1431 dangerous_instruction = superinstruction[-1].disasm
1433 if call_jmp.match(dangerous_instruction):
1434 # If "dangerous instruction" is call or jmp then we need to check if two
1437 if len(superinstruction) != 2:
1438 raise DoNotMatchError(superinstruction)
1440 m = and_for_call_jmp.match(superinstruction[0].disasm)
1442 raise DoNotMatchError(superinstruction)
1443 register_and = m.group('register')
1445 m = call_jmp.match(dangerous_instruction)
1447 raise DoNotMatchError(superinstruction)
1448 register_call_jmp = m.group('register')
1450 if register_and == register_call_jmp:
1451 for instruction in superinstruction:
1452 _GetLegacyPrefixes(instruction) # to detect repeated prefixes
1455 raise SandboxingError(
1456 'nacljump32/naclcall32: {0} != {1}'.format(
1457 register_and, register_call_jmp),
1460 raise DoNotMatchError(superinstruction)
1463 def ValidateSuperinstruction64(superinstruction):
1464 """Validate superinstruction with x86-64 set of regexps.
1466 If set of instructions includes something unknown (unknown functions
1467 or prefixes, wrong number of instructions, etc), then assert is triggered.
1469 There corner case exist: naclcall/nacljmp instruction sequences are too
1470 complex to process by DFA alone (it produces too large DFA and MSVC chokes
1471 on it) thus it's verified partially by DFA and partially by code in
1472 actions. For these we generate either "True" or "False", other
1473 superinstruction always produce "True" or throw an error.
1476 superinstruction: list of objdump_parser.Instruction tuples
1479 dangerous_instruction = superinstruction[-1].disasm
1481 # This is dangerous instructions in naclcall/nacljmp
1482 callq_jmpq = re.compile(
1483 r'(callq|jmpq) ' # callq or jmpq
1484 r'[*](?P<register>%r[0-9a-z]+)$') # register name
1485 # These are sandboxing instructions for naclcall/nacljmp
1486 # TODO(shcherbina): actually we only want to allow 0xffffffe0 as a mask,
1487 # but it's safe anyway because what really matters is that lower 5 bits
1488 # of the mask are zeroes.
1489 # Disallow 0xe0 once
1490 # https://code.google.com/p/nativeclient/issues/detail?id=3164 is fixed.
1491 and_for_callq_jmpq = re.compile(
1492 r'and [$]0x(f)*e0,(?P<register>%e[a-z][a-z]|%r[89]d|%r1[0-4]d)$')
1493 add_for_callq_jmpq = re.compile(
1494 r'add %r15,(?P<register>%r[0-9a-z]+)$')
1496 if callq_jmpq.match(dangerous_instruction):
1497 # If "dangerous instruction" is callq or jmpq then we need to check if all
1500 if len(superinstruction) != 3:
1501 raise DoNotMatchError(superinstruction)
1503 m = and_for_callq_jmpq.match(superinstruction[0].disasm)
1505 raise DoNotMatchError(superinstruction)
1506 register_and = m.group('register')
1508 m = add_for_callq_jmpq.match(superinstruction[1].disasm)
1510 raise DoNotMatchError(superinstruction)
1511 register_add = m.group('register')
1513 m = callq_jmpq.match(dangerous_instruction)
1515 raise DoNotMatchError(superinstruction)
1516 register_callq_jmpq = m.group('register')
1518 # Double-check that registers are 32-bit and convert them to 64-bit so
1519 # they can be compared
1520 if register_and[1] == 'e':
1521 register_and = '%r' + register_and[2:]
1522 elif re.match(r'%r\d+d', register_and):
1523 register_and = register_and[:-1]
1525 assert False, ('Unknown (or possible non-32-bit) register found. '
1526 'This should never happen!')
1527 if register_and == register_add == register_callq_jmpq:
1528 for instruction in superinstruction:
1529 _GetLegacyPrefixes(instruction) # to detect repeated prefixes
1532 raise SandboxingError(
1533 'nacljump64/naclcall64: registers do not match ({0}, {1}, {2})'.format(
1534 register_and, register_add, register_callq_jmpq),
1537 raise DoNotMatchError(superinstruction)
1539 # These are dangerous string instructions (there are three cases)
1540 string_instruction_rdi_no_rsi = re.compile(
1541 r'(maskmovq %mm[0-7],%mm[0-7]|' # maskmovq
1542 r'v?maskmovdqu %xmm([0-9]|1[0-5]),%xmm([0-9]|1[0-5])|' # [v]maskmovdqu
1543 r'((repnz|repz) )?scas %es:[(]%rdi[)],(%al|%ax|%eax|%rax)|' # scas
1544 r'(rep )?stos (%al|%ax|%eax|%rax),%es:[(]%rdi[)])$') # stos
1545 string_instruction_rsi_no_rdi = re.compile(
1546 r'(rep )?lods %ds:[(]%rsi[)],(%al|%ax|%eax|%rax)$') # lods
1547 string_instruction_rsi_rdi = re.compile(
1548 r'(((repnz|repz) )?cmps[blqw] %es:[(]%rdi[)],%ds:[(]%rsi[)]|' # cmps
1549 r'(rep )?movs[blqw] %ds:[(]%rsi[)],%es:[(]%rdi[)])$') # movs
1550 # These are sandboxing instructions for string instructions
1551 mov_esi_esi = re.compile(r'mov %esi,%esi$')
1552 lea_r15_rsi_rsi = re.compile(r'lea [(]%r15,%rsi,1[)],%rsi$')
1553 mov_edi_edi = re.compile(r'mov %edi,%edi$')
1554 lea_r15_rdi_rdi = re.compile(r'lea [(]%r15,%rdi,1[)],%rdi$')
1556 if string_instruction_rsi_no_rdi.match(dangerous_instruction):
1557 if len(superinstruction) != 3:
1558 raise DoNotMatchError(superinstruction)
1559 if mov_esi_esi.match(superinstruction[0].disasm) is None:
1560 raise DoNotMatchError(superinstruction)
1561 if lea_r15_rsi_rsi.match(superinstruction[1].disasm) is None:
1562 raise DoNotMatchError(superinstruction)
1564 elif string_instruction_rdi_no_rsi.match(dangerous_instruction):
1565 if len(superinstruction) != 3:
1566 raise DoNotMatchError(superinstruction)
1567 if mov_edi_edi.match(superinstruction[0].disasm) is None:
1568 raise DoNotMatchError(superinstruction)
1569 if lea_r15_rdi_rdi.match(superinstruction[1].disasm) is None:
1570 raise DoNotMatchError(superinstruction)
1571 # vmaskmovdqu is disabled for compatibility with the previous validator
1572 if dangerous_instruction.startswith('vmaskmovdqu '):
1573 raise SandboxingError('vmaskmovdqu is disallowed', superinstruction)
1575 elif string_instruction_rsi_rdi.match(dangerous_instruction):
1576 if len(superinstruction) != 5:
1577 raise DoNotMatchError(superinstruction)
1578 if mov_esi_esi.match(superinstruction[0].disasm) is None:
1579 raise DoNotMatchError(superinstruction)
1580 if lea_r15_rsi_rsi.match(superinstruction[1].disasm) is None:
1581 raise DoNotMatchError(superinstruction)
1582 if mov_edi_edi.match(superinstruction[2].disasm) is None:
1583 raise DoNotMatchError(superinstruction)
1584 if lea_r15_rdi_rdi.match(superinstruction[3].disasm) is None:
1585 raise DoNotMatchError(superinstruction)
1588 raise DoNotMatchError(superinstruction)
1590 for instruction in superinstruction:
1591 _GetLegacyPrefixes(instruction) # to detect repeated prefixes