1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "src/assembler.h"
9 #include "src/regexp-macro-assembler.h"
10 #include "src/regexp-stack.h"
11 #include "src/simulator.h"
16 RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone)
17 : slow_safe_compiler_(false),
18 global_mode_(NOT_GLOBAL),
23 RegExpMacroAssembler::~RegExpMacroAssembler() {
27 bool RegExpMacroAssembler::CanReadUnaligned() {
28 #ifdef V8_HOST_CAN_READ_UNALIGNED
36 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
38 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Zone* zone)
39 : RegExpMacroAssembler(zone) {
43 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
47 bool NativeRegExpMacroAssembler::CanReadUnaligned() {
48 return FLAG_enable_unaligned_accesses && !slow_safe();
51 const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
54 // Not just flat, but ultra flat.
55 DCHECK(subject->IsExternalString() || subject->IsSeqString());
56 DCHECK(start_index >= 0);
57 DCHECK(start_index <= subject->length());
58 if (subject->IsOneByteRepresentation()) {
60 if (StringShape(subject).IsExternal()) {
61 const uint8_t* data = ExternalAsciiString::cast(subject)->GetChars();
62 address = reinterpret_cast<const byte*>(data);
64 DCHECK(subject->IsSeqOneByteString());
65 const uint8_t* data = SeqOneByteString::cast(subject)->GetChars();
66 address = reinterpret_cast<const byte*>(data);
68 return address + start_index;
71 if (StringShape(subject).IsExternal()) {
72 data = ExternalTwoByteString::cast(subject)->GetChars();
74 DCHECK(subject->IsSeqTwoByteString());
75 data = SeqTwoByteString::cast(subject)->GetChars();
77 return reinterpret_cast<const byte*>(data + start_index);
81 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
82 Handle<Code> regexp_code,
83 Handle<String> subject,
85 int offsets_vector_length,
89 DCHECK(subject->IsFlat());
90 DCHECK(previous_index >= 0);
91 DCHECK(previous_index <= subject->length());
93 // No allocations before calling the regexp, but we can't use
94 // DisallowHeapAllocation, since regexps might be preempted, and another
95 // thread might do allocation anyway.
97 String* subject_ptr = *subject;
98 // Character offsets into string.
99 int start_offset = previous_index;
100 int char_length = subject_ptr->length() - start_offset;
101 int slice_offset = 0;
103 // The string has been flattened, so if it is a cons string it contains the
104 // full string in the first part.
105 if (StringShape(subject_ptr).IsCons()) {
106 DCHECK_EQ(0, ConsString::cast(subject_ptr)->second()->length());
107 subject_ptr = ConsString::cast(subject_ptr)->first();
108 } else if (StringShape(subject_ptr).IsSliced()) {
109 SlicedString* slice = SlicedString::cast(subject_ptr);
110 subject_ptr = slice->parent();
111 slice_offset = slice->offset();
113 // Ensure that an underlying string has the same ASCII-ness.
114 bool is_ascii = subject_ptr->IsOneByteRepresentation();
115 DCHECK(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
116 // String is now either Sequential or External
117 int char_size_shift = is_ascii ? 0 : 1;
119 const byte* input_start =
120 StringCharacterPosition(subject_ptr, start_offset + slice_offset);
121 int byte_length = char_length << char_size_shift;
122 const byte* input_end = input_start + byte_length;
123 Result res = Execute(*regexp_code,
129 offsets_vector_length,
135 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
137 String* input, // This needs to be the unpacked (sliced, cons) string.
139 const byte* input_start,
140 const byte* input_end,
144 // Ensure that the minimum stack has been allocated.
145 RegExpStackScope stack_scope(isolate);
146 Address stack_base = stack_scope.stack()->stack_base();
149 int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
159 DCHECK(result >= RETRY);
161 if (result == EXCEPTION && !isolate->has_pending_exception()) {
162 // We detected a stack overflow (on the backtrack stack) in RegExp code,
163 // but haven't created the exception yet.
164 isolate->StackOverflow();
166 return static_cast<Result>(result);
170 const byte NativeRegExpMacroAssembler::word_character_map[] = {
171 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
172 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
173 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
174 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
176 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
177 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
178 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
179 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
181 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
182 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
183 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
184 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
186 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
187 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
188 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
189 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
191 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
192 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
193 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
194 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
197 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
198 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
199 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
201 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
202 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
203 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
204 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
206 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
207 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
208 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
209 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
213 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
214 Address byte_offset1,
215 Address byte_offset2,
218 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
219 isolate->regexp_macro_assembler_canonicalize();
220 // This function is not allowed to cause a garbage collection.
221 // A GC might move the calling generated code and invalidate the
222 // return address on the stack.
223 DCHECK(byte_length % 2 == 0);
224 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
225 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
226 size_t length = byte_length >> 1;
228 for (size_t i = 0; i < length; i++) {
229 unibrow::uchar c1 = substring1[i];
230 unibrow::uchar c2 = substring2[i];
232 unibrow::uchar s1[1] = { c1 };
233 canonicalize->get(c1, '\0', s1);
235 unibrow::uchar s2[1] = { c2 };
236 canonicalize->get(c2, '\0', s2);
237 if (s1[0] != s2[0]) {
247 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
250 RegExpStack* regexp_stack = isolate->regexp_stack();
251 size_t size = regexp_stack->stack_capacity();
252 Address old_stack_base = regexp_stack->stack_base();
253 DCHECK(old_stack_base == *stack_base);
254 DCHECK(stack_pointer <= old_stack_base);
255 DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
256 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
257 if (new_stack_base == NULL) {
260 *stack_base = new_stack_base;
261 intptr_t stack_content_size = old_stack_base - stack_pointer;
262 return new_stack_base - stack_content_size;
265 #endif // V8_INTERPRETED_REGEXP
267 } } // namespace v8::internal