__ cmp(r2, Operand(r0, ASR, kSmiTagSize));
__ b(gt, &runtime);
+ // Reset offset for possibly sliced string.
+ __ mov(r9, Operand(0));
// subject: Subject string
// regexp_data: RegExp data (FixedArray)
// Check the representation and encoding of the subject string.
__ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset));
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));
// First check for flat string.
- __ tst(r0, Operand(kIsNotStringMask | kStringRepresentationMask));
+ __ and_(r1, r0, Operand(kIsNotStringMask | kStringRepresentationMask), SetCC);
STATIC_ASSERT((kStringTag | kSeqStringTag) == 0);
__ b(eq, &seq_string);
// subject: Subject string
// regexp_data: RegExp data (FixedArray)
- // Check for flat cons string.
+ // Check for flat cons string or sliced string.
// A flat cons string is a cons string where the second part is the empty
// string. In that case the subject string is just the first part of the cons
// string. Also in this case the first part of the cons string is known to be
// a sequential string or an external string.
- STATIC_ASSERT(kExternalStringTag !=0);
- STATIC_ASSERT((kConsStringTag & kExternalStringTag) == 0);
- __ tst(r0, Operand(kIsNotStringMask | kExternalStringTag));
- __ b(ne, &runtime);
+ // In the case of a sliced string its offset has to be taken into account.
+ Label cons_string, check_encoding;
+ STATIC_ASSERT((kConsStringTag < kExternalStringTag));
+ STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
+ __ cmp(r1, Operand(kExternalStringTag));
+ __ b(lt, &cons_string);
+ __ b(eq, &runtime);
+
+ // String is sliced.
+ __ ldr(r9, FieldMemOperand(subject, SlicedString::kOffsetOffset));
+ __ mov(r9, Operand(r9, ASR, kSmiTagSize));
+ __ ldr(subject, FieldMemOperand(subject, SlicedString::kParentOffset));
+ // r9: offset of sliced string, smi-tagged.
+ __ jmp(&check_encoding);
+ // String is a cons string, check whether it is flat.
+ __ bind(&cons_string);
__ ldr(r0, FieldMemOperand(subject, ConsString::kSecondOffset));
__ LoadRoot(r1, Heap::kEmptyStringRootIndex);
__ cmp(r0, r1);
__ b(ne, &runtime);
__ ldr(subject, FieldMemOperand(subject, ConsString::kFirstOffset));
+ // Is first part of cons or parent of slice a flat string?
+ __ bind(&check_encoding);
__ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset));
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));
- // Is first part a flat string?
STATIC_ASSERT(kSeqStringTag == 0);
__ tst(r0, Operand(kStringRepresentationMask));
__ b(ne, &runtime);
-
__ bind(&seq_string);
// subject: Subject string
// regexp_data: RegExp data (FixedArray)
// For arguments 4 and 3 get string length, calculate start of string data and
// calculate the shift of the index (0 for ASCII and 1 for two byte).
- __ ldr(r0, FieldMemOperand(subject, String::kLengthOffset));
- __ mov(r0, Operand(r0, ASR, kSmiTagSize));
STATIC_ASSERT(SeqAsciiString::kHeaderSize == SeqTwoByteString::kHeaderSize);
- __ add(r9, subject, Operand(SeqAsciiString::kHeaderSize - kHeapObjectTag));
+ __ add(r8, subject, Operand(SeqAsciiString::kHeaderSize - kHeapObjectTag));
__ eor(r3, r3, Operand(1));
- // Argument 4 (r3): End of string data
- // Argument 3 (r2): Start of string data
+ // Load the length from the original subject string from the previous stack
+ // frame. Therefore we have to use fp, which points exactly to two pointer
+ // sizes below the previous sp. (Because creating a new stack frame pushes
+ // the previous fp onto the stack and moves up sp by 2 * kPointerSize.)
+ __ ldr(r0, MemOperand(fp, kSubjectOffset + 2 * kPointerSize));
+ // If slice offset is not 0, load the length from the original sliced string.
+ // Argument 4, r3: End of string data
+ // Argument 3, r2: Start of string data
+ // Prepare start and end index of the input.
+ __ add(r9, r8, Operand(r9, LSL, r3));
__ add(r2, r9, Operand(r1, LSL, r3));
- __ add(r3, r9, Operand(r0, LSL, r3));
+
+ __ ldr(r8, FieldMemOperand(r0, String::kLengthOffset));
+ __ mov(r8, Operand(r8, ASR, kSmiTagSize));
+ __ add(r3, r9, Operand(r8, LSL, r3));
// Argument 2 (r1): Previous index.
// Already there
// Argument 1 (r0): Subject string.
- __ mov(r0, subject);
+ // Already there
// Locate the code entry and call it.
__ add(r7, r7, Operand(Code::kHeaderSize - kHeapObjectTag));
// Check the result.
Label success;
- __ cmp(r0, Operand(NativeRegExpMacroAssembler::SUCCESS));
+ __ cmp(subject, Operand(NativeRegExpMacroAssembler::SUCCESS));
__ b(eq, &success);
Label failure;
- __ cmp(r0, Operand(NativeRegExpMacroAssembler::FAILURE));
+ __ cmp(subject, Operand(NativeRegExpMacroAssembler::FAILURE));
__ b(eq, &failure);
- __ cmp(r0, Operand(NativeRegExpMacroAssembler::EXCEPTION));
+ __ cmp(subject, Operand(NativeRegExpMacroAssembler::EXCEPTION));
// If not exception it can only be retry. Handle that in the runtime system.
__ b(ne, &runtime);
// Result must now be exception. If there is no pending exception already a
__ mov(r2, Operand(ExternalReference(Isolate::k_pending_exception_address,
isolate)));
__ ldr(r0, MemOperand(r2, 0));
- __ cmp(r0, r1);
+ __ cmp(subject, r1);
__ b(eq, &runtime);
__ str(r1, MemOperand(r2, 0)); // Clear pending exception.
// Check if the exception is a termination. If so, throw as uncatchable.
__ LoadRoot(ip, Heap::kTerminationExceptionRootIndex);
- __ cmp(r0, ip);
+ __ cmp(subject, ip);
Label termination_exception;
__ b(eq, &termination_exception);
- __ Throw(r0); // Expects thrown value in r0.
+ __ Throw(subject); // Expects thrown value in r0.
__ bind(&termination_exception);
__ ThrowUncatchable(TERMINATION, r0); // Expects thrown value in r0.
Label flat_string;
Label ascii_string;
Label got_char_code;
+ Label sliced_string;
// If the receiver is a smi trigger the non-string case.
__ JumpIfSmi(object_, receiver_not_string_);
__ b(eq, &flat_string);
// Handle non-flat strings.
- __ tst(result_, Operand(kIsConsStringMask));
+ __ and_(result_, result_, Operand(kStringRepresentationMask));
+ STATIC_ASSERT((kConsStringTag < kExternalStringTag));
+ STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
+ __ cmp(result_, Operand(kExternalStringTag));
+ __ b(gt, &sliced_string);
__ b(eq, &call_runtime_);
// ConsString.
// this is really a flat string in a cons string). If that is not
// the case we would rather go to the runtime system now to flatten
// the string.
+ Label assure_seq_string;
__ ldr(result_, FieldMemOperand(object_, ConsString::kSecondOffset));
__ LoadRoot(ip, Heap::kEmptyStringRootIndex);
__ cmp(result_, Operand(ip));
__ b(ne, &call_runtime_);
// Get the first of the two strings and load its instance type.
__ ldr(object_, FieldMemOperand(object_, ConsString::kFirstOffset));
+ __ jmp(&assure_seq_string);
+
+ // SlicedString, unpack and add offset.
+ __ bind(&sliced_string);
+ __ ldr(result_, FieldMemOperand(object_, SlicedString::kOffsetOffset));
+ __ add(scratch_, scratch_, result_);
+ __ ldr(object_, FieldMemOperand(object_, SlicedString::kParentOffset));
+
+ // Assure that we are dealing with a sequential string. Go to runtime if not.
+ __ bind(&assure_seq_string);
__ ldr(result_, FieldMemOperand(object_, HeapObject::kMapOffset));
__ ldrb(result_, FieldMemOperand(result_, Map::kInstanceTypeOffset));
- // If the first cons component is also non-flat, then go to runtime.
+ // Check that parent is not an external string. Go to runtime otherwise.
STATIC_ASSERT(kSeqStringTag == 0);
__ tst(result_, Operand(kStringRepresentationMask));
__ b(ne, &call_runtime_);
// Check bounds and smi-ness.
Register to = r6;
Register from = r7;
+
+ if (FLAG_string_slices) {
+ __ nop(0); // Jumping as first instruction would crash the code generation.
+ __ jmp(&runtime);
+ }
+
__ Ldrd(to, from, MemOperand(sp, kToOffset));
STATIC_ASSERT(kFromOffset == kToOffset + 4);
STATIC_ASSERT(kSmiTag == 0);
STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
+
// I.e., arithmetic shift right by one un-smi-tags.
__ mov(r2, Operand(to, ASR, 1), SetCC);
__ mov(r3, Operand(from, ASR, 1), SetCC, cc);
__ b(mi, &runtime); // From is negative.
// Both to and from are smis.
-
__ sub(r2, r2, Operand(r3), SetCC);
__ b(mi, &runtime); // Fail if from > to.
// Special handling of sub-strings of length 1 and 2. One character strings
LInstruction* LChunkBuilder::DoStringCharCodeAt(HStringCharCodeAt* instr) {
- LOperand* string = UseRegister(instr->string());
- LOperand* index = UseRegisterOrConstant(instr->index());
+ LOperand* string = UseTempRegister(instr->string());
+ LOperand* index = UseTempRegister(instr->index());
LStringCharCodeAt* result = new LStringCharCodeAt(string, index);
return AssignEnvironment(AssignPointerMap(DefineAsRegister(result)));
}
LStringCharCodeAt* instr_;
};
- Register scratch = scratch0();
Register string = ToRegister(instr->string());
- Register index = no_reg;
- int const_index = -1;
- if (instr->index()->IsConstantOperand()) {
- const_index = ToInteger32(LConstantOperand::cast(instr->index()));
- STATIC_ASSERT(String::kMaxLength <= Smi::kMaxValue);
- if (!Smi::IsValid(const_index)) {
- // Guaranteed to be out of bounds because of the assert above.
- // So the bounds check that must dominate this instruction must
- // have deoptimized already.
- if (FLAG_debug_code) {
- __ Abort("StringCharCodeAt: out of bounds index.");
- }
- // No code needs to be generated.
- return;
- }
- } else {
- index = ToRegister(instr->index());
- }
+ Register index = ToRegister(instr->index());
Register result = ToRegister(instr->result());
DeferredStringCharCodeAt* deferred =
new DeferredStringCharCodeAt(this, instr);
- Label flat_string, ascii_string, done;
-
// Fetch the instance type of the receiver into result register.
__ ldr(result, FieldMemOperand(string, HeapObject::kMapOffset));
__ ldrb(result, FieldMemOperand(result, Map::kInstanceTypeOffset));
- // We need special handling for non-flat strings.
- STATIC_ASSERT(kSeqStringTag == 0);
- __ tst(result, Operand(kStringRepresentationMask));
- __ b(eq, &flat_string);
-
- // Handle non-flat strings.
- __ tst(result, Operand(kIsConsStringMask));
- __ b(eq, deferred->entry());
-
- // ConsString.
+ // We need special handling for indirect strings.
+ Label check_sequential;
+ __ tst(result, Operand(kIsIndirectStringMask));
+ __ b(eq, &check_sequential);
+
+ // Dispatch on the indirect string shape: slice or cons.
+ Label cons_string;
+ const uint32_t kSlicedNotConsMask = kSlicedStringTag & ~kConsStringTag;
+ ASSERT(IsPowerOf2(kSlicedNotConsMask) && kSlicedNotConsMask != 0);
+ __ tst(result, Operand(kSlicedNotConsMask));
+ __ b(eq, &cons_string);
+
+ // Handle slices.
+ Label indirect_string_loaded;
+ __ ldr(result, FieldMemOperand(string, SlicedString::kOffsetOffset));
+ __ add(index, index, Operand(result, ASR, kSmiTagSize));
+ __ ldr(string, FieldMemOperand(string, SlicedString::kParentOffset));
+ __ jmp(&indirect_string_loaded);
+
+ // Handle conses.
// Check whether the right hand side is the empty string (i.e. if
// this is really a flat string in a cons string). If that is not
// the case we would rather go to the runtime system now to flatten
// the string.
- __ ldr(scratch, FieldMemOperand(string, ConsString::kSecondOffset));
+ __ bind(&cons_string);
+ __ ldr(result, FieldMemOperand(string, ConsString::kSecondOffset));
__ LoadRoot(ip, Heap::kEmptyStringRootIndex);
- __ cmp(scratch, ip);
+ __ cmp(result, ip);
__ b(ne, deferred->entry());
// Get the first of the two strings and load its instance type.
__ ldr(string, FieldMemOperand(string, ConsString::kFirstOffset));
+
+ __ bind(&indirect_string_loaded);
__ ldr(result, FieldMemOperand(string, HeapObject::kMapOffset));
__ ldrb(result, FieldMemOperand(result, Map::kInstanceTypeOffset));
- // If the first cons component is also non-flat, then go to runtime.
+
+ // Check whether the string is sequential. The only non-sequential
+ // shapes we support have just been unwrapped above.
+ __ bind(&check_sequential);
STATIC_ASSERT(kSeqStringTag == 0);
__ tst(result, Operand(kStringRepresentationMask));
__ b(ne, deferred->entry());
- // Check for 1-byte or 2-byte string.
- __ bind(&flat_string);
+ // Dispatch on the encoding: ASCII or two-byte.
+ Label ascii_string;
STATIC_ASSERT(kAsciiStringTag != 0);
__ tst(result, Operand(kStringEncodingMask));
__ b(ne, &ascii_string);
- // 2-byte string.
- // Load the 2-byte character code into the result register.
- STATIC_ASSERT(kSmiTag == 0 && kSmiTagSize == 1);
- if (instr->index()->IsConstantOperand()) {
- __ ldrh(result,
- FieldMemOperand(string,
- SeqTwoByteString::kHeaderSize + 2 * const_index));
- } else {
- __ add(scratch,
- string,
- Operand(SeqTwoByteString::kHeaderSize - kHeapObjectTag));
- __ ldrh(result, MemOperand(scratch, index, LSL, 1));
- }
+ // Two-byte string.
+ // Load the two-byte character code into the result register.
+ Label done;
+ __ add(result,
+ string,
+ Operand(SeqTwoByteString::kHeaderSize - kHeapObjectTag));
+ __ ldrh(result, MemOperand(result, index, LSL, 1));
__ jmp(&done);
// ASCII string.
// Load the byte into the result register.
__ bind(&ascii_string);
- if (instr->index()->IsConstantOperand()) {
- __ ldrb(result, FieldMemOperand(string,
- SeqAsciiString::kHeaderSize + const_index));
- } else {
- __ add(scratch,
- string,
- Operand(SeqAsciiString::kHeaderSize - kHeapObjectTag));
- __ ldrb(result, MemOperand(scratch, index));
- }
+ __ add(result,
+ string,
+ Operand(SeqAsciiString::kHeaderSize - kHeapObjectTag));
+ __ ldrb(result, MemOperand(result, index));
+
__ bind(&done);
__ bind(deferred->exit());
}
}
// Prepare for possible GC.
- HandleScope handles;
+ HandleScope handles(isolate);
Handle<Code> code_handle(re_code);
Handle<String> subject(frame_entry<String*>(re_frame, kInputString));
+
// Current string.
- bool is_ascii = subject->IsAsciiRepresentation();
+ bool is_ascii = subject->IsAsciiRepresentationUnderneath();
ASSERT(re_code->instruction_start() <= *return_address);
ASSERT(*return_address <=
return EXCEPTION;
}
+ Handle<String> subject_tmp = subject;
+ int slice_offset = 0;
+
+ // Extract the underlying string and the slice offset.
+ if (StringShape(*subject_tmp).IsCons()) {
+ subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());
+ } else if (StringShape(*subject_tmp).IsSliced()) {
+ SlicedString* slice = SlicedString::cast(*subject_tmp);
+ subject_tmp = Handle<String>(slice->parent());
+ slice_offset = slice->offset();
+ }
+
// String might have changed.
- if (subject->IsAsciiRepresentation() != is_ascii) {
+ if (subject_tmp->IsAsciiRepresentation() != is_ascii) {
// If we changed between an ASCII and an UC16 string, the specialized
// code cannot be used, and we need to restart regexp matching from
// scratch (including, potentially, compiling a new version of the code).
// be a sequential or external string with the same content.
// Update the start and end pointers in the stack frame to the current
// location (whether it has actually moved or not).
- ASSERT(StringShape(*subject).IsSequential() ||
- StringShape(*subject).IsExternal());
+ ASSERT(StringShape(*subject_tmp).IsSequential() ||
+ StringShape(*subject_tmp).IsExternal());
// The original start address of the characters to match.
const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart);
// Find the current start address of the same character at the current string
// position.
int start_index = frame_entry<int>(re_frame, kStartIndex);
- const byte* new_address = StringCharacterPosition(*subject, start_index);
+ const byte* new_address = StringCharacterPosition(*subject_tmp,
+ start_index + slice_offset);
if (start_address != new_address) {
// If there is a difference, update the object pointer and start and end
// addresses in the RegExp stack frame to match the new value.
const byte* end_address = frame_entry<const byte* >(re_frame, kInputEnd);
- int byte_length = end_address - start_address;
+ int byte_length = static_cast<int>(end_address - start_address);
frame_entry<const String*>(re_frame, kInputString) = *subject;
frame_entry<const byte*>(re_frame, kInputStart) = new_address;
frame_entry<const byte*>(re_frame, kInputEnd) = new_address + byte_length;
// Flags for experimental implementation features.
DEFINE_bool(unbox_double_arrays, true, "automatically unbox arrays of doubles")
+DEFINE_bool(string_slices, false, "use string slices")
// Flags for Crankshaft.
#ifdef V8_TARGET_ARCH_MIPS
ASSERT(type != JS_GLOBAL_PROPERTY_CELL_TYPE);
if (type < FIRST_NONSTRING_TYPE) {
- // There are three string representations: sequential strings, cons
- // strings, and external strings. Only cons strings contain
- // non-map-word pointers to heap objects.
- return ((type & kStringRepresentationMask) == kConsStringTag)
+ // There are four string representations: sequential strings, external
+ // strings, cons strings, and sliced strings.
+ // Only the latter two contain non-map-word pointers to heap objects.
+ return ((type & kIsIndirectStringMask) == kIsIndirectStringTag)
? OLD_POINTER_SPACE
: OLD_DATA_SPACE;
} else {
&ObjectEvacuationStrategy<POINTER_OBJECT>::
template VisitSpecialized<ConsString::kSize>);
+ table_.Register(kVisitSlicedString,
+ &ObjectEvacuationStrategy<POINTER_OBJECT>::
+ template VisitSpecialized<SlicedString::kSize>);
+
table_.Register(kVisitSharedFunctionInfo,
&ObjectEvacuationStrategy<POINTER_OBJECT>::
template VisitSpecialized<SharedFunctionInfo::kSize>);
// If the resulting string is small make a flat string.
if (length < String::kMinNonFlatLength) {
+ // Note that neither of the two inputs can be a slice because:
+ STATIC_ASSERT(String::kMinNonFlatLength <= SlicedString::kMinLength);
ASSERT(first->IsFlat());
ASSERT(second->IsFlat());
if (is_ascii) {
// Make an attempt to flatten the buffer to reduce access time.
buffer = buffer->TryFlattenGetString();
+ // TODO(1626): For now slicing external strings is not supported. However,
+ // a flat cons string can have an external string as first part in some cases.
+ // Therefore we have to single out this case as well.
+ if (!FLAG_string_slices ||
+ (buffer->IsConsString() &&
+ (!buffer->IsFlat() ||
+ !ConsString::cast(buffer)->first()->IsSeqString())) ||
+ buffer->IsExternalString() ||
+ length < SlicedString::kMinLength ||
+ pretenure == TENURED) {
+ Object* result;
+ { MaybeObject* maybe_result = buffer->IsAsciiRepresentation()
+ ? AllocateRawAsciiString(length, pretenure)
+ : AllocateRawTwoByteString(length, pretenure);
+ if (!maybe_result->ToObject(&result)) return maybe_result;
+ }
+ String* string_result = String::cast(result);
+ // Copy the characters into the new object.
+ if (buffer->IsAsciiRepresentation()) {
+ ASSERT(string_result->IsAsciiRepresentation());
+ char* dest = SeqAsciiString::cast(string_result)->GetChars();
+ String::WriteToFlat(buffer, dest, start, end);
+ } else {
+ ASSERT(string_result->IsTwoByteRepresentation());
+ uc16* dest = SeqTwoByteString::cast(string_result)->GetChars();
+ String::WriteToFlat(buffer, dest, start, end);
+ }
+ return result;
+ }
+
+ ASSERT(buffer->IsFlat());
+ ASSERT(!buffer->IsExternalString());
+#if DEBUG
+ buffer->StringVerify();
+#endif
+
Object* result;
- { MaybeObject* maybe_result = buffer->IsAsciiRepresentation()
- ? AllocateRawAsciiString(length, pretenure )
- : AllocateRawTwoByteString(length, pretenure);
+ { Map* map = buffer->IsAsciiRepresentation()
+ ? sliced_ascii_string_map()
+ : sliced_string_map();
+ MaybeObject* maybe_result = Allocate(map, NEW_SPACE);
if (!maybe_result->ToObject(&result)) return maybe_result;
}
- String* string_result = String::cast(result);
- // Copy the characters into the new object.
- if (buffer->IsAsciiRepresentation()) {
- ASSERT(string_result->IsAsciiRepresentation());
- char* dest = SeqAsciiString::cast(string_result)->GetChars();
- String::WriteToFlat(buffer, dest, start, end);
+
+ AssertNoAllocation no_gc;
+ SlicedString* sliced_string = SlicedString::cast(result);
+ sliced_string->set_length(length);
+ sliced_string->set_hash_field(String::kEmptyHashField);
+ if (buffer->IsConsString()) {
+ ConsString* cons = ConsString::cast(buffer);
+ ASSERT(cons->second()->length() == 0);
+ sliced_string->set_parent(cons->first());
+ sliced_string->set_offset(start);
+ } else if (buffer->IsSlicedString()) {
+ // Prevent nesting sliced strings.
+ SlicedString* parent_slice = SlicedString::cast(buffer);
+ sliced_string->set_parent(parent_slice->parent());
+ sliced_string->set_offset(start + parent_slice->offset());
} else {
- ASSERT(string_result->IsTwoByteRepresentation());
- uc16* dest = SeqTwoByteString::cast(string_result)->GetChars();
- String::WriteToFlat(buffer, dest, start, end);
+ sliced_string->set_parent(buffer);
+ sliced_string->set_offset(start);
}
-
+ ASSERT(sliced_string->parent()->IsSeqString());
return result;
}
V(Map, symbol_map, SymbolMap) \
V(Map, cons_string_map, ConsStringMap) \
V(Map, cons_ascii_string_map, ConsAsciiStringMap) \
+ V(Map, sliced_string_map, SlicedStringMap) \
+ V(Map, sliced_ascii_string_map, SlicedAsciiStringMap) \
V(Map, ascii_symbol_map, AsciiSymbolMap) \
V(Map, cons_symbol_map, ConsSymbolMap) \
V(Map, cons_ascii_symbol_map, ConsAsciiSymbolMap) \
__ cmp(edx, Operand(eax));
__ j(greater, &runtime);
+ // Reset offset for possibly sliced string.
+ __ Set(edi, Immediate(0));
// ecx: RegExp data (FixedArray)
// Check the representation and encoding of the subject string.
Label seq_ascii_string, seq_two_byte_string, check_code;
__ and_(ebx,
kIsNotStringMask | kStringRepresentationMask | kStringEncodingMask);
STATIC_ASSERT((kStringTag | kSeqStringTag | kTwoByteStringTag) == 0);
- __ j(zero, &seq_two_byte_string);
+ __ j(zero, &seq_two_byte_string, Label::kNear);
// Any other flat string must be a flat ascii string.
- __ test(Operand(ebx),
+ __ and_(Operand(ebx),
Immediate(kIsNotStringMask | kStringRepresentationMask));
- __ j(zero, &seq_ascii_string);
+ __ j(zero, &seq_ascii_string, Label::kNear);
- // Check for flat cons string.
+ // Check for flat cons string or sliced string.
// A flat cons string is a cons string where the second part is the empty
// string. In that case the subject string is just the first part of the cons
// string. Also in this case the first part of the cons string is known to be
// a sequential string or an external string.
- STATIC_ASSERT(kExternalStringTag != 0);
- STATIC_ASSERT((kConsStringTag & kExternalStringTag) == 0);
- __ test(Operand(ebx),
- Immediate(kIsNotStringMask | kExternalStringTag));
- __ j(not_zero, &runtime);
- // String is a cons string.
- __ mov(edx, FieldOperand(eax, ConsString::kSecondOffset));
- __ cmp(Operand(edx), factory->empty_string());
+ // In the case of a sliced string its offset has to be taken into account.
+ Label cons_string, check_encoding;
+ STATIC_ASSERT((kConsStringTag < kExternalStringTag));
+ STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
+ __ cmp(Operand(ebx), Immediate(kExternalStringTag));
+ __ j(less, &cons_string);
+ __ j(equal, &runtime);
+
+ // String is sliced.
+ __ mov(edi, FieldOperand(eax, SlicedString::kOffsetOffset));
+ __ mov(eax, FieldOperand(eax, SlicedString::kParentOffset));
+ // edi: offset of sliced string, smi-tagged.
+ // eax: parent string.
+ __ jmp(&check_encoding, Label::kNear);
+ // String is a cons string, check whether it is flat.
+ __ bind(&cons_string);
+ __ cmp(FieldOperand(eax, ConsString::kSecondOffset), factory->empty_string());
__ j(not_equal, &runtime);
__ mov(eax, FieldOperand(eax, ConsString::kFirstOffset));
+ __ bind(&check_encoding);
__ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset));
- // String is a cons string with empty second part.
- // eax: first part of cons string.
- // ebx: map of first part of cons string.
- // Is first part a flat two byte string?
+ // eax: first part of cons string or parent of sliced string.
+ // ebx: map of first part of cons string or map of parent of sliced string.
+ // Is first part of cons or parent of slice a flat two byte string?
__ test_b(FieldOperand(ebx, Map::kInstanceTypeOffset),
kStringRepresentationMask | kStringEncodingMask);
STATIC_ASSERT((kSeqStringTag | kTwoByteStringTag) == 0);
- __ j(zero, &seq_two_byte_string);
+ __ j(zero, &seq_two_byte_string, Label::kNear);
// Any other flat string must be ascii.
__ test_b(FieldOperand(ebx, Map::kInstanceTypeOffset),
kStringRepresentationMask);
// eax: subject string (flat ascii)
// ecx: RegExp data (FixedArray)
__ mov(edx, FieldOperand(ecx, JSRegExp::kDataAsciiCodeOffset));
- __ Set(edi, Immediate(1)); // Type is ascii.
- __ jmp(&check_code);
+ __ Set(ecx, Immediate(1)); // Type is ascii.
+ __ jmp(&check_code, Label::kNear);
__ bind(&seq_two_byte_string);
// eax: subject string (flat two byte)
// ecx: RegExp data (FixedArray)
__ mov(edx, FieldOperand(ecx, JSRegExp::kDataUC16CodeOffset));
- __ Set(edi, Immediate(0)); // Type is two byte.
+ __ Set(ecx, Immediate(0)); // Type is two byte.
__ bind(&check_code);
// Check that the irregexp code has been generated for the actual string
// eax: subject string
// edx: code
- // edi: encoding of subject string (1 if ascii, 0 if two_byte);
+ // ecx: encoding of subject string (1 if ascii, 0 if two_byte);
// Load used arguments before starting to push arguments for call to native
// RegExp code to avoid handling changing stack height.
__ mov(ebx, Operand(esp, kPreviousIndexOffset));
// eax: subject string
// ebx: previous index
// edx: code
- // edi: encoding of subject string (1 if ascii 0 if two_byte);
+ // ecx: encoding of subject string (1 if ascii 0 if two_byte);
// All checks done. Now push arguments for native regexp code.
Counters* counters = masm->isolate()->counters();
__ IncrementCounter(counters->regexp_entry_native(), 1);
__ mov(Operand(esp, 6 * kPointerSize), Immediate(1));
// Argument 6: Start (high end) of backtracking stack memory area.
- __ mov(ecx, Operand::StaticVariable(address_of_regexp_stack_memory_address));
- __ add(ecx, Operand::StaticVariable(address_of_regexp_stack_memory_size));
- __ mov(Operand(esp, 5 * kPointerSize), ecx);
+ __ mov(esi, Operand::StaticVariable(address_of_regexp_stack_memory_address));
+ __ add(esi, Operand::StaticVariable(address_of_regexp_stack_memory_size));
+ __ mov(Operand(esp, 5 * kPointerSize), esi);
// Argument 5: static offsets vector buffer.
__ mov(Operand(esp, 4 * kPointerSize),
Immediate(ExternalReference::address_of_static_offsets_vector(
masm->isolate())));
+ // Argument 2: Previous index.
+ __ mov(Operand(esp, 1 * kPointerSize), ebx);
+
+ // Argument 1: Original subject string.
+ // The original subject is in the previous stack frame. Therefore we have to
+ // use ebp, which points exactly to one pointer size below the previous esp.
+ // (Because creating a new stack frame pushes the previous ebp onto the stack
+ // and thereby moves up esp by one kPointerSize.)
+ __ mov(esi, Operand(ebp, kSubjectOffset + kPointerSize));
+ __ mov(Operand(esp, 0 * kPointerSize), esi);
+
+ // esi: original subject string
+ // eax: underlying subject string
+ // ebx: previous index
+ // ecx: encoding of subject string (1 if ascii 0 if two_byte);
+ // edx: code
// Argument 4: End of string data
// Argument 3: Start of string data
+ // Prepare start and end index of the input.
+ // Load the length from the original sliced string if that is the case.
+ __ mov(esi, FieldOperand(esi, String::kLengthOffset));
+ __ add(esi, Operand(edi)); // Calculate input end wrt offset.
+ __ SmiUntag(edi);
+ __ add(ebx, Operand(edi)); // Calculate input start wrt offset.
+
+ // ebx: start index of the input string
+ // esi: end index of the input string
Label setup_two_byte, setup_rest;
- __ test(edi, Operand(edi));
- __ mov(edi, FieldOperand(eax, String::kLengthOffset));
+ __ test(ecx, Operand(ecx));
__ j(zero, &setup_two_byte, Label::kNear);
- __ SmiUntag(edi);
- __ lea(ecx, FieldOperand(eax, edi, times_1, SeqAsciiString::kHeaderSize));
+ __ SmiUntag(esi);
+ __ lea(ecx, FieldOperand(eax, esi, times_1, SeqAsciiString::kHeaderSize));
__ mov(Operand(esp, 3 * kPointerSize), ecx); // Argument 4.
__ lea(ecx, FieldOperand(eax, ebx, times_1, SeqAsciiString::kHeaderSize));
__ mov(Operand(esp, 2 * kPointerSize), ecx); // Argument 3.
__ bind(&setup_two_byte);
STATIC_ASSERT(kSmiTag == 0);
- STATIC_ASSERT(kSmiTagSize == 1); // edi is smi (powered by 2).
- __ lea(ecx, FieldOperand(eax, edi, times_1, SeqTwoByteString::kHeaderSize));
+ STATIC_ASSERT(kSmiTagSize == 1); // esi is smi (powered by 2).
+ __ lea(ecx, FieldOperand(eax, esi, times_1, SeqTwoByteString::kHeaderSize));
__ mov(Operand(esp, 3 * kPointerSize), ecx); // Argument 4.
__ lea(ecx, FieldOperand(eax, ebx, times_2, SeqTwoByteString::kHeaderSize));
__ mov(Operand(esp, 2 * kPointerSize), ecx); // Argument 3.
__ bind(&setup_rest);
- // Argument 2: Previous index.
- __ mov(Operand(esp, 1 * kPointerSize), ebx);
-
- // Argument 1: Subject string.
- __ mov(Operand(esp, 0 * kPointerSize), eax);
-
// Locate the code entry and call it.
__ add(Operand(edx), Immediate(Code::kHeaderSize - kHeapObjectTag));
__ call(Operand(edx));
// by javascript code.
__ cmp(eax, factory->termination_exception());
Label throw_termination_exception;
- __ j(equal, &throw_termination_exception);
+ __ j(equal, &throw_termination_exception, Label::kNear);
// Handle normal exception by following handler chain.
__ Throw(eax);
Label flat_string;
Label ascii_string;
Label got_char_code;
+ Label sliced_string;
// If the receiver is a smi trigger the non-string case.
STATIC_ASSERT(kSmiTag == 0);
__ j(zero, &flat_string);
// Handle non-flat strings.
- __ test(result_, Immediate(kIsConsStringMask));
- __ j(zero, &call_runtime_);
+ __ and_(result_, kStringRepresentationMask);
+ STATIC_ASSERT((kConsStringTag < kExternalStringTag));
+ STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
+ __ cmp(result_, kExternalStringTag);
+ __ j(greater, &sliced_string, Label::kNear);
+ __ j(equal, &call_runtime_);
// ConsString.
// Check whether the right hand side is the empty string (i.e. if
// this is really a flat string in a cons string). If that is not
// the case we would rather go to the runtime system now to flatten
// the string.
+ Label assure_seq_string;
__ cmp(FieldOperand(object_, ConsString::kSecondOffset),
Immediate(masm->isolate()->factory()->empty_string()));
__ j(not_equal, &call_runtime_);
// Get the first of the two strings and load its instance type.
__ mov(object_, FieldOperand(object_, ConsString::kFirstOffset));
+ __ jmp(&assure_seq_string, Label::kNear);
+
+ // SlicedString, unpack and add offset.
+ __ bind(&sliced_string);
+ __ add(scratch_, FieldOperand(object_, SlicedString::kOffsetOffset));
+ __ mov(object_, FieldOperand(object_, SlicedString::kParentOffset));
+
+ // Assure that we are dealing with a sequential string. Go to runtime if not.
+ __ bind(&assure_seq_string);
__ mov(result_, FieldOperand(object_, HeapObject::kMapOffset));
__ movzx_b(result_, FieldOperand(result_, Map::kInstanceTypeOffset));
- // If the first cons component is also non-flat, then go to runtime.
STATIC_ASSERT(kSeqStringTag == 0);
__ test(result_, Immediate(kStringRepresentationMask));
__ j(not_zero, &call_runtime_);
+ __ jmp(&flat_string, Label::kNear);
// Check for 1-byte or 2-byte string.
__ bind(&flat_string);
STATIC_ASSERT(kAsciiStringTag != 0);
__ test(result_, Immediate(kStringEncodingMask));
- __ j(not_zero, &ascii_string);
+ __ j(not_zero, &ascii_string, Label::kNear);
// 2-byte string.
// Load the 2-byte character code into the result register.
__ movzx_w(result_, FieldOperand(object_,
scratch_, times_1, // Scratch is smi-tagged.
SeqTwoByteString::kHeaderSize));
- __ jmp(&got_char_code);
+ __ jmp(&got_char_code, Label::kNear);
// ASCII string.
// Load the byte into the result register.
__ and_(ecx, kStringRepresentationMask);
__ cmp(ecx, kExternalStringTag);
__ j(equal, &string_add_runtime);
+ // We cannot encounter sliced strings here since:
+ STATIC_ASSERT(SlicedString::kMinLength >= String::kMinNonFlatLength);
// Now check if both strings are ascii strings.
// eax: first string
// ebx: length of resulting flat string as a smi
void SubStringStub::Generate(MacroAssembler* masm) {
Label runtime;
+ if (FLAG_string_slices) {
+ __ jmp(&runtime);
+ }
// Stack frame on entry.
// esp[0]: return address
// esp[4]: to
};
Register string = ToRegister(instr->string());
- Register index = no_reg;
- int const_index = -1;
- if (instr->index()->IsConstantOperand()) {
- const_index = ToInteger32(LConstantOperand::cast(instr->index()));
- STATIC_ASSERT(String::kMaxLength <= Smi::kMaxValue);
- if (!Smi::IsValid(const_index)) {
- // Guaranteed to be out of bounds because of the assert above.
- // So the bounds check that must dominate this instruction must
- // have deoptimized already.
- if (FLAG_debug_code) {
- __ Abort("StringCharCodeAt: out of bounds index.");
- }
- // No code needs to be generated.
- return;
- }
- } else {
- index = ToRegister(instr->index());
- }
+ Register index = ToRegister(instr->index());
Register result = ToRegister(instr->result());
DeferredStringCharCodeAt* deferred =
new DeferredStringCharCodeAt(this, instr);
- Label flat_string, ascii_string, done;
-
// Fetch the instance type of the receiver into result register.
__ mov(result, FieldOperand(string, HeapObject::kMapOffset));
__ movzx_b(result, FieldOperand(result, Map::kInstanceTypeOffset));
- // We need special handling for non-flat strings.
- STATIC_ASSERT(kSeqStringTag == 0);
- __ test(result, Immediate(kStringRepresentationMask));
- __ j(zero, &flat_string, Label::kNear);
-
- // Handle non-flat strings.
- __ test(result, Immediate(kIsConsStringMask));
- __ j(zero, deferred->entry());
+ // We need special handling for indirect strings.
+ Label check_sequential;
+ __ test(result, Immediate(kIsIndirectStringMask));
+ __ j(zero, &check_sequential, Label::kNear);
+
+ // Dispatch on the indirect string shape: slice or cons.
+ Label cons_string;
+ const uint32_t kSlicedNotConsMask = kSlicedStringTag & ~kConsStringTag;
+ ASSERT(IsPowerOf2(kSlicedNotConsMask) && kSlicedNotConsMask != 0);
+ __ test(result, Immediate(kSlicedNotConsMask));
+ __ j(zero, &cons_string, Label::kNear);
+
+ // Handle slices.
+ Label indirect_string_loaded;
+ __ mov(result, FieldOperand(string, SlicedString::kOffsetOffset));
+ __ SmiUntag(result);
+ __ add(index, Operand(result));
+ __ mov(string, FieldOperand(string, SlicedString::kParentOffset));
+ __ jmp(&indirect_string_loaded, Label::kNear);
- // ConsString.
+ // Handle conses.
// Check whether the right hand side is the empty string (i.e. if
// this is really a flat string in a cons string). If that is not
// the case we would rather go to the runtime system now to flatten
// the string.
+ __ bind(&cons_string);
__ cmp(FieldOperand(string, ConsString::kSecondOffset),
Immediate(factory()->empty_string()));
__ j(not_equal, deferred->entry());
- // Get the first of the two strings and load its instance type.
__ mov(string, FieldOperand(string, ConsString::kFirstOffset));
+
+ __ bind(&indirect_string_loaded);
__ mov(result, FieldOperand(string, HeapObject::kMapOffset));
__ movzx_b(result, FieldOperand(result, Map::kInstanceTypeOffset));
- // If the first cons component is also non-flat, then go to runtime.
+
+ // Check whether the string is sequential. The only non-sequential
+ // shapes we support have just been unwrapped above.
+ __ bind(&check_sequential);
STATIC_ASSERT(kSeqStringTag == 0);
__ test(result, Immediate(kStringRepresentationMask));
__ j(not_zero, deferred->entry());
- // Check for ASCII or two-byte string.
- __ bind(&flat_string);
+ // Dispatch on the encoding: ASCII or two-byte.
+ Label ascii_string;
STATIC_ASSERT(kAsciiStringTag != 0);
__ test(result, Immediate(kStringEncodingMask));
__ j(not_zero, &ascii_string, Label::kNear);
// Two-byte string.
// Load the two-byte character code into the result register.
+ Label done;
STATIC_ASSERT(kSmiTag == 0 && kSmiTagSize == 1);
- if (instr->index()->IsConstantOperand()) {
- __ movzx_w(result,
- FieldOperand(string,
- SeqTwoByteString::kHeaderSize +
- (kUC16Size * const_index)));
- } else {
- __ movzx_w(result, FieldOperand(string,
- index,
- times_2,
- SeqTwoByteString::kHeaderSize));
- }
+ __ movzx_w(result, FieldOperand(string,
+ index,
+ times_2,
+ SeqTwoByteString::kHeaderSize));
__ jmp(&done, Label::kNear);
// ASCII string.
// Load the byte into the result register.
__ bind(&ascii_string);
- if (instr->index()->IsConstantOperand()) {
- __ movzx_b(result, FieldOperand(string,
- SeqAsciiString::kHeaderSize + const_index));
- } else {
- __ movzx_b(result, FieldOperand(string,
- index,
- times_1,
- SeqAsciiString::kHeaderSize));
- }
+ __ movzx_b(result, FieldOperand(string,
+ index,
+ times_1,
+ SeqAsciiString::kHeaderSize));
__ bind(&done);
__ bind(deferred->exit());
}
LInstruction* LChunkBuilder::DoStringCharCodeAt(HStringCharCodeAt* instr) {
- LOperand* string = UseRegister(instr->string());
- LOperand* index = UseRegisterOrConstant(instr->index());
+ LOperand* string = UseTempRegister(instr->string());
+ LOperand* index = UseTempRegister(instr->index());
LOperand* context = UseAny(instr->context());
LStringCharCodeAt* result = new LStringCharCodeAt(context, string, index);
return AssignEnvironment(AssignPointerMap(DefineAsRegister(result)));
}
// Prepare for possible GC.
- HandleScope handles;
+ HandleScope handles(isolate);
Handle<Code> code_handle(re_code);
Handle<String> subject(frame_entry<String*>(re_frame, kInputString));
+
// Current string.
- bool is_ascii = subject->IsAsciiRepresentation();
+ bool is_ascii = subject->IsAsciiRepresentationUnderneath();
ASSERT(re_code->instruction_start() <= *return_address);
ASSERT(*return_address <=
return EXCEPTION;
}
+ Handle<String> subject_tmp = subject;
+ int slice_offset = 0;
+
+ // Extract the underlying string and the slice offset.
+ if (StringShape(*subject_tmp).IsCons()) {
+ subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());
+ } else if (StringShape(*subject_tmp).IsSliced()) {
+ SlicedString* slice = SlicedString::cast(*subject_tmp);
+ subject_tmp = Handle<String>(slice->parent());
+ slice_offset = slice->offset();
+ }
+
// String might have changed.
- if (subject->IsAsciiRepresentation() != is_ascii) {
+ if (subject_tmp->IsAsciiRepresentation() != is_ascii) {
// If we changed between an ASCII and an UC16 string, the specialized
// code cannot be used, and we need to restart regexp matching from
// scratch (including, potentially, compiling a new version of the code).
// be a sequential or external string with the same content.
// Update the start and end pointers in the stack frame to the current
// location (whether it has actually moved or not).
- ASSERT(StringShape(*subject).IsSequential() ||
- StringShape(*subject).IsExternal());
+ ASSERT(StringShape(*subject_tmp).IsSequential() ||
+ StringShape(*subject_tmp).IsExternal());
// The original start address of the characters to match.
const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart);
// Find the current start address of the same character at the current string
// position.
int start_index = frame_entry<int>(re_frame, kStartIndex);
- const byte* new_address = StringCharacterPosition(*subject, start_index);
+ const byte* new_address = StringCharacterPosition(*subject_tmp,
+ start_index + slice_offset);
if (start_address != new_address) {
// If there is a difference, update the object pointer and start and end
// addresses in the RegExp stack frame to match the new value.
const byte* end_address = frame_entry<const byte* >(re_frame, kInputEnd);
- int byte_length = end_address - start_address;
+ int byte_length = static_cast<int>(end_address - start_address);
frame_entry<const String*>(re_frame, kInputString) = *subject;
frame_entry<const byte*>(re_frame, kInputStart) = new_address;
frame_entry<const byte*>(re_frame, kInputEnd) = new_address + byte_length;
if (!subject->IsFlat()) FlattenString(subject);
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
- // Extract flattened substrings of cons strings before determining asciiness.
String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
+ ASSERT(StringShape(needle).IsSequential());
int needle_len = needle->length();
ASSERT(needle->IsFlat());
JSRegExp::Flags flags = re->GetFlags();
Handle<String> pattern(re->Pattern());
- if (!pattern->IsFlat()) {
- FlattenString(pattern);
- }
-
+ if (!pattern->IsFlat()) FlattenString(pattern);
RegExpCompileData compile_data;
FlatStringReader reader(isolate, pattern);
if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
Handle<String> subject) {
- if (!subject->IsFlat()) {
- FlattenString(subject);
- }
+ if (!subject->IsFlat()) FlattenString(subject);
+
// Check the asciiness of the underlying storage.
- bool is_ascii;
- {
- AssertNoAllocation no_gc;
- String* sequential_string = *subject;
- if (subject->IsConsString()) {
- sequential_string = ConsString::cast(*subject)->first();
- }
- is_ascii = sequential_string->IsAsciiRepresentation();
- }
- if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
- return -1;
- }
+ bool is_ascii = subject->IsAsciiRepresentationUnderneath();
+ if (!EnsureCompiledIrregexp(regexp, is_ascii)) return -1;
+
#ifdef V8_INTERPRETED_REGEXP
// Byte-code regexp needs space allocated for all its registers.
return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));
ASSERT(index <= subject->length());
ASSERT(subject->IsFlat());
- // A flat ASCII string might have a two-byte first part.
- if (subject->IsConsString()) {
- subject = Handle<String>(ConsString::cast(*subject)->first(), isolate);
- }
+ bool is_ascii = subject->IsAsciiRepresentationUnderneath();
#ifndef V8_INTERPRETED_REGEXP
ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
do {
- bool is_ascii = subject->IsAsciiRepresentation();
EnsureCompiledIrregexp(regexp, is_ascii);
Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
NativeRegExpMacroAssembler::Result res =
// being internal and external, and even between being ASCII and UC16,
// but the characters are always the same).
IrregexpPrepare(regexp, subject);
+ is_ascii = subject->IsAsciiRepresentationUnderneath();
} while (true);
UNREACHABLE();
return RE_EXCEPTION;
#else // V8_INTERPRETED_REGEXP
ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));
- bool is_ascii = subject->IsAsciiRepresentation();
// We must have done EnsureCompiledIrregexp, so we can get the number of
// registers.
int* register_vector = output.start();
ConsString::BodyDescriptor,
void>::Visit);
+ table_.Register(kVisitSlicedString,
+ &FixedBodyVisitor<StaticMarkingVisitor,
+ SlicedString::BodyDescriptor,
+ void>::Visit);
table_.Register(kVisitFixedArray,
&FlexibleBodyVisitor<StaticMarkingVisitor,
if (IsSymbol()) {
CHECK(!HEAP->InNewSpace(this));
}
+ if (IsConsString()) {
+ ConsString::cast(this)->ConsStringVerify();
+ } else if (IsSlicedString()) {
+ SlicedString::cast(this)->SlicedStringVerify();
+ }
+}
+
+
+void ConsString::ConsStringVerify() {
+ CHECK(this->first()->IsString());
+ CHECK(this->second() == GetHeap()->empty_string() ||
+ this->second()->IsString());
+ CHECK(this->length() >= String::kMinNonFlatLength);
+ if (this->IsFlat()) {
+ // A flat cons can only be created by String::SlowTryFlatten.
+ // Afterwards, the first part may be externalized.
+ CHECK(this->first()->IsSeqString() || this->first()->IsExternalString());
+ }
+}
+
+
+void SlicedString::SlicedStringVerify() {
+ CHECK(!this->parent()->IsConsString());
+ CHECK(!this->parent()->IsSlicedString());
+ CHECK(this->length() >= SlicedString::kMinLength);
}
bool Object::IsConsString() {
- if (!this->IsHeapObject()) return false;
- uint32_t type = HeapObject::cast(this)->map()->instance_type();
- return (type & (kIsNotStringMask | kStringRepresentationMask)) ==
- (kStringTag | kConsStringTag);
+ if (!IsString()) return false;
+ return StringShape(String::cast(this)).IsCons();
+}
+
+
+bool Object::IsSlicedString() {
+ if (!IsString()) return false;
+ return StringShape(String::cast(this)).IsSliced();
}
}
+bool String::IsAsciiRepresentationUnderneath() {
+ uint32_t type = map()->instance_type();
+ STATIC_ASSERT(kIsIndirectStringTag != 0);
+ STATIC_ASSERT((kIsIndirectStringMask & kStringEncodingMask) == 0);
+ ASSERT(IsFlat());
+ switch (type & (kIsIndirectStringMask | kStringEncodingMask)) {
+ case kAsciiStringTag:
+ return true;
+ case kTwoByteStringTag:
+ return false;
+ default: // Cons or sliced string. Need to go deeper.
+ return GetUnderlying()->IsAsciiRepresentation();
+ }
+}
+
+
+bool String::IsTwoByteRepresentationUnderneath() {
+ uint32_t type = map()->instance_type();
+ STATIC_ASSERT(kIsIndirectStringTag != 0);
+ STATIC_ASSERT((kIsIndirectStringMask & kStringEncodingMask) == 0);
+ ASSERT(IsFlat());
+ switch (type & (kIsIndirectStringMask | kStringEncodingMask)) {
+ case kAsciiStringTag:
+ return false;
+ case kTwoByteStringTag:
+ return true;
+ default: // Cons or sliced string. Need to go deeper.
+ return GetUnderlying()->IsTwoByteRepresentation();
+ }
+}
+
+
bool String::HasOnlyAsciiChars() {
uint32_t type = map()->instance_type();
return (type & kStringEncodingMask) == kAsciiStringTag ||
}
+bool StringShape::IsSliced() {
+ return (type_ & kStringRepresentationMask) == kSlicedStringTag;
+}
+
+
+bool StringShape::IsIndirect() {
+ return (type_ & kIsIndirectStringMask) == kIsIndirectStringTag;
+}
+
+
bool StringShape::IsExternal() {
return (type_ & kStringRepresentationMask) == kExternalStringTag;
}
CAST_ACCESSOR(SeqString)
CAST_ACCESSOR(SeqAsciiString)
CAST_ACCESSOR(SeqTwoByteString)
+CAST_ACCESSOR(SlicedString)
CAST_ACCESSOR(ConsString)
CAST_ACCESSOR(ExternalString)
CAST_ACCESSOR(ExternalAsciiString)
MaybeObject* String::TryFlatten(PretenureFlag pretenure) {
if (!StringShape(this).IsCons()) return this;
ConsString* cons = ConsString::cast(this);
- if (cons->second()->length() == 0) return cons->first();
+ if (cons->IsFlat()) return cons->first();
return SlowTryFlatten(pretenure);
}
String* String::TryFlattenGetString(PretenureFlag pretenure) {
MaybeObject* flat = TryFlatten(pretenure);
Object* successfully_flattened;
- if (flat->ToObject(&successfully_flattened)) {
- return String::cast(successfully_flattened);
- }
- return this;
+ if (!flat->ToObject(&successfully_flattened)) return this;
+ return String::cast(successfully_flattened);
}
return ExternalAsciiString::cast(this)->ExternalAsciiStringGet(index);
case kExternalStringTag | kTwoByteStringTag:
return ExternalTwoByteString::cast(this)->ExternalTwoByteStringGet(index);
+ case kSlicedStringTag | kAsciiStringTag:
+ case kSlicedStringTag | kTwoByteStringTag:
+ return SlicedString::cast(this)->SlicedStringGet(index);
default:
break;
}
bool String::IsFlat() {
- switch (StringShape(this).representation_tag()) {
- case kConsStringTag: {
- String* second = ConsString::cast(this)->second();
- // Only flattened strings have second part empty.
- return second->length() == 0;
- }
- default:
- return true;
- }
+ if (!StringShape(this).IsCons()) return true;
+ return ConsString::cast(this)->second()->length() == 0;
+}
+
+
+String* String::GetUnderlying() {
+ // Giving direct access to underlying string only makes sense if the
+ // wrapping string is already flattened.
+ ASSERT(this->IsFlat());
+ ASSERT(StringShape(this).IsIndirect());
+ STATIC_ASSERT(ConsString::kFirstOffset == SlicedString::kParentOffset);
+ const int kUnderlyingOffset = SlicedString::kParentOffset;
+ return String::cast(READ_FIELD(this, kUnderlyingOffset));
}
}
+String* SlicedString::parent() {
+ return String::cast(READ_FIELD(this, kParentOffset));
+}
+
+
+void SlicedString::set_parent(String* parent) {
+ ASSERT(parent->IsSeqString());
+ WRITE_FIELD(this, kParentOffset, parent);
+}
+
+
+SMI_ACCESSORS(SlicedString, offset, kOffsetOffset)
+
+
String* ConsString::first() {
return String::cast(READ_FIELD(this, kFirstOffset));
}
return kVisitConsString;
}
+ case kSlicedStringTag:
+ return kVisitSlicedString;
+
case kExternalStringTag:
return GetVisitorIdForSize(kVisitDataObject,
kVisitDataObjectGeneric,
kVisitStructGeneric,
kVisitConsString,
+ kVisitSlicedString,
kVisitOddball,
kVisitCode,
kVisitMap,
ConsString::BodyDescriptor,
int>::Visit);
+ table_.Register(kVisitSlicedString,
+ &FixedBodyVisitor<StaticVisitor,
+ SlicedString::BodyDescriptor,
+ int>::Visit);
+
table_.Register(kVisitFixedArray,
&FlexibleBodyVisitor<StaticVisitor,
FixedArray::BodyDescriptor,
case kConsStringTag:
ConsString::BodyDescriptor::IterateBody(this, v);
break;
+ case kSlicedStringTag:
+ SlicedString::BodyDescriptor::IterateBody(this, v);
+ break;
case kExternalStringTag:
if ((type & kStringEncodingMask) == kAsciiStringTag) {
reinterpret_cast<ExternalAsciiString*>(this)->
int length = this->length();
StringShape shape(this);
String* string = this;
+ int offset = 0;
if (shape.representation_tag() == kConsStringTag) {
ConsString* cons = ConsString::cast(string);
if (cons->second()->length() != 0) {
string = cons->first();
shape = StringShape(string);
}
+ if (shape.representation_tag() == kSlicedStringTag) {
+ SlicedString* slice = SlicedString::cast(string);
+ offset = slice->offset();
+ string = slice->parent();
+ shape = StringShape(string);
+ ASSERT(shape.representation_tag() != kConsStringTag &&
+ shape.representation_tag() != kSlicedStringTag);
+ }
if (shape.encoding_tag() == kAsciiStringTag) {
const char* start;
if (shape.representation_tag() == kSeqStringTag) {
} else {
start = ExternalAsciiString::cast(string)->resource()->data();
}
- return FlatContent(Vector<const char>(start, length));
+ return FlatContent(Vector<const char>(start + offset, length));
} else {
ASSERT(shape.encoding_tag() == kTwoByteStringTag);
const uc16* start;
} else {
start = ExternalTwoByteString::cast(string)->resource()->data();
}
- return FlatContent(Vector<const uc16>(start, length));
+ return FlatContent(Vector<const uc16>(start + offset, length));
}
}
const uc16* String::GetTwoByteData(unsigned start) {
- ASSERT(!IsAsciiRepresentation());
+ ASSERT(!IsAsciiRepresentationUnderneath());
switch (StringShape(this).representation_tag()) {
case kSeqStringTag:
return SeqTwoByteString::cast(this)->SeqTwoByteStringGetData(start);
case kExternalStringTag:
return ExternalTwoByteString::cast(this)->
ExternalTwoByteStringGetData(start);
+ case kSlicedStringTag: {
+ SlicedString* slice = SlicedString::cast(this);
+ return slice->parent()->GetTwoByteData(start + slice->offset());
+ }
case kConsStringTag:
UNREACHABLE();
return NULL;
max_chars);
return rbb->util_buffer;
}
+ case kSlicedStringTag:
+ return SlicedString::cast(input)->SlicedStringReadBlock(rbb,
+ offset_ptr,
+ max_chars);
default:
break;
}
max_chars);
}
return;
+ case kSlicedStringTag:
+ SlicedString::cast(input)->SlicedStringReadBlockIntoBuffer(rbb,
+ offset_ptr,
+ max_chars);
+ return;
default:
break;
}
}
+uint16_t SlicedString::SlicedStringGet(int index) {
+ return parent()->Get(offset() + index);
+}
+
+
+const unibrow::byte* SlicedString::SlicedStringReadBlock(
+ ReadBlockBuffer* buffer, unsigned* offset_ptr, unsigned chars) {
+ unsigned offset = this->offset();
+ *offset_ptr += offset;
+ const unibrow::byte* answer = String::ReadBlock(String::cast(parent()),
+ buffer, offset_ptr, chars);
+ *offset_ptr -= offset;
+ return answer;
+}
+
+
+void SlicedString::SlicedStringReadBlockIntoBuffer(
+ ReadBlockBuffer* buffer, unsigned* offset_ptr, unsigned chars) {
+ unsigned offset = this->offset();
+ *offset_ptr += offset;
+ String::ReadBlockIntoBuffer(String::cast(parent()),
+ buffer, offset_ptr, chars);
+ *offset_ptr -= offset;
+}
+
template <typename sinkchar>
void String::WriteToFlat(String* src,
sinkchar* sink,
}
break;
}
+ case kAsciiStringTag | kSlicedStringTag:
+ case kTwoByteStringTag | kSlicedStringTag: {
+ SlicedString* slice = SlicedString::cast(source);
+ unsigned offset = slice->offset();
+ WriteToFlat(slice->parent(), sink, from + offset, to + offset);
+ return;
+ }
}
}
}
// - SeqString
// - SeqAsciiString
// - SeqTwoByteString
+// - SlicedString
// - ConsString
// - ExternalString
// - ExternalAsciiString
V(ASCII_STRING_TYPE) \
V(CONS_STRING_TYPE) \
V(CONS_ASCII_STRING_TYPE) \
+ V(SLICED_STRING_TYPE) \
V(EXTERNAL_STRING_TYPE) \
V(EXTERNAL_STRING_WITH_ASCII_DATA_TYPE) \
V(EXTERNAL_ASCII_STRING_TYPE) \
ConsString::kSize, \
cons_ascii_string, \
ConsAsciiString) \
+ V(SLICED_STRING_TYPE, \
+ SlicedString::kSize, \
+ sliced_string, \
+ SlicedString) \
+ V(SLICED_ASCII_STRING_TYPE, \
+ SlicedString::kSize, \
+ sliced_ascii_string, \
+ SlicedAsciiString) \
V(EXTERNAL_STRING_TYPE, \
ExternalTwoByteString::kSize, \
external_string, \
enum StringRepresentationTag {
kSeqStringTag = 0x0,
kConsStringTag = 0x1,
- kExternalStringTag = 0x2
+ kExternalStringTag = 0x2,
+ kSlicedStringTag = 0x3
};
-const uint32_t kIsConsStringMask = 0x1;
+const uint32_t kIsIndirectStringMask = 0x1;
+const uint32_t kIsIndirectStringTag = 0x1;
+STATIC_ASSERT((kSeqStringTag & kIsIndirectStringMask) == 0);
+STATIC_ASSERT((kExternalStringTag & kIsIndirectStringMask) == 0);
+STATIC_ASSERT(
+ (kConsStringTag & kIsIndirectStringMask) == kIsIndirectStringTag);
+STATIC_ASSERT(
+ (kSlicedStringTag & kIsIndirectStringMask) == kIsIndirectStringTag);
// If bit 7 is clear, then bit 3 indicates whether this two-byte
// string actually contains ascii data.
ASCII_STRING_TYPE = kAsciiStringTag | kSeqStringTag,
CONS_STRING_TYPE = kTwoByteStringTag | kConsStringTag,
CONS_ASCII_STRING_TYPE = kAsciiStringTag | kConsStringTag,
+ SLICED_STRING_TYPE = kTwoByteStringTag | kSlicedStringTag,
+ SLICED_ASCII_STRING_TYPE = kAsciiStringTag | kSlicedStringTag,
EXTERNAL_STRING_TYPE = kTwoByteStringTag | kExternalStringTag,
EXTERNAL_STRING_WITH_ASCII_DATA_TYPE =
kTwoByteStringTag | kExternalStringTag | kAsciiDataHintTag,
V(SeqString) \
V(ExternalString) \
V(ConsString) \
+ V(SlicedString) \
V(ExternalTwoByteString) \
V(ExternalAsciiString) \
V(SeqTwoByteString) \
inline bool IsSequential();
inline bool IsExternal();
inline bool IsCons();
+ inline bool IsSliced();
+ inline bool IsIndirect();
inline bool IsExternalAscii();
inline bool IsExternalTwoByte();
inline bool IsSequentialAscii();
inline uint32_t hash_field();
inline void set_hash_field(uint32_t value);
- inline bool IsAsciiRepresentation();
- inline bool IsTwoByteRepresentation();
-
// Returns whether this string has only ASCII chars, i.e. all of them can
// be ASCII encoded. This might be the case even if the string is
// two-byte. Such strings may appear when the embedder prefers
// two-byte external representations even for ASCII data.
- //
+ inline bool IsAsciiRepresentation();
+ inline bool IsTwoByteRepresentation();
+
+ // Cons and slices have an encoding flag that may not represent the actual
+ // encoding of the underlying string. This is taken into account here.
+ // Requires: this->IsFlat()
+ inline bool IsAsciiRepresentationUnderneath();
+ inline bool IsTwoByteRepresentationUnderneath();
+
// NOTE: this should be considered only a hint. False negatives are
// possible.
inline bool HasOnlyAsciiChars();
// kind.
FlatContent GetFlatContent();
+ // Returns the parent of a sliced string or first part of a flat cons string.
+ // Requires: StringShape(this).IsIndirect() && this->IsFlat()
+ inline String* GetUnderlying();
+
// Mark the string as an undetectable object. It only applies to
// ascii and two byte string types.
bool MarkAsUndetectable();
typedef FixedBodyDescriptor<kFirstOffset, kSecondOffset + kPointerSize, kSize>
BodyDescriptor;
+#ifdef DEBUG
+ void ConsStringVerify();
+#endif
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ConsString);
};
+// The Sliced String class describes strings that are substrings of another
+// sequential string. The motivation is to save time and memory when creating
+// a substring. A Sliced String is described as a pointer to the parent,
+// the offset from the start of the parent string and the length. Using
+// a Sliced String therefore requires unpacking of the parent string and
+// adding the offset to the start address. A substring of a Sliced String
+// are not nested since the double indirection is simplified when creating
+// such a substring.
+// Currently missing features are:
+// - handling externalized parent strings
+// - external strings as parent
+// - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
+class SlicedString: public String {
+ public:
+
+ inline String* parent();
+ inline void set_parent(String* parent);
+ inline int offset();
+ inline void set_offset(int offset);
+
+ // Dispatched behavior.
+ uint16_t SlicedStringGet(int index);
+
+ // Casting.
+ static inline SlicedString* cast(Object* obj);
+
+ // Layout description.
+ static const int kParentOffset = POINTER_SIZE_ALIGN(String::kSize);
+ static const int kOffsetOffset = kParentOffset + kPointerSize;
+ static const int kSize = kOffsetOffset + kPointerSize;
+
+ // Support for StringInputBuffer
+ inline const unibrow::byte* SlicedStringReadBlock(ReadBlockBuffer* buffer,
+ unsigned* offset_ptr,
+ unsigned chars);
+ inline void SlicedStringReadBlockIntoBuffer(ReadBlockBuffer* buffer,
+ unsigned* offset_ptr,
+ unsigned chars);
+ // Minimum length for a sliced string.
+ static const int kMinLength = 13;
+
+ typedef FixedBodyDescriptor<kParentOffset,
+ kOffsetOffset + kPointerSize, kSize>
+ BodyDescriptor;
+
+#ifdef DEBUG
+ void SlicedStringVerify();
+#endif
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SlicedString);
+};
+
+
// The ExternalString class describes string values that are backed by
// a string resource that lies outside the V8 heap. ExternalStrings
// consist of the length field common to all strings, a pointer to the
String* subject_ptr = *subject;
// Character offsets into string.
int start_offset = previous_index;
- int end_offset = subject_ptr->length();
+ int char_length = subject_ptr->length() - start_offset;
+ int slice_offset = 0;
- // The string has been flattened, so it it is a cons string it contains the
+ // The string has been flattened, so if it is a cons string it contains the
// full string in the first part.
if (StringShape(subject_ptr).IsCons()) {
ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
subject_ptr = ConsString::cast(subject_ptr)->first();
+ } else if (StringShape(subject_ptr).IsSliced()) {
+ SlicedString* slice = SlicedString::cast(subject_ptr);
+ subject_ptr = slice->parent();
+ slice_offset = slice->offset();
}
// Ensure that an underlying string has the same ascii-ness.
bool is_ascii = subject_ptr->IsAsciiRepresentation();
ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
// String is now either Sequential or External
int char_size_shift = is_ascii ? 0 : 1;
- int char_length = end_offset - start_offset;
const byte* input_start =
- StringCharacterPosition(subject_ptr, start_offset);
+ StringCharacterPosition(subject_ptr, start_offset + slice_offset);
int byte_length = char_length << char_size_shift;
const byte* input_end = input_start + byte_length;
Result res = Execute(*regexp_code,
- subject_ptr,
+ *subject,
start_offset,
input_start,
input_end,
NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
Code* code,
- String* input,
+ String* input, // This needs to be the unpacked (sliced, cons) string.
int start_offset,
const byte* input_start,
const byte* input_end,
HandleScope handles(isolate);
CONVERT_ARG_CHECKED(String, subject, 1);
- if (!subject->IsFlat()) { FlattenString(subject); }
+ if (!subject->IsFlat()) FlattenString(subject);
CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
CONVERT_ARG_CHECKED(JSArray, last_match_info, 2);
CONVERT_ARG_CHECKED(JSArray, result_array, 3);
__ testq(kScratchRegister, kScratchRegister);
__ j(zero, &runtime);
-
// Check that the first argument is a JSRegExp object.
__ movq(rax, Operand(rsp, kJSRegExpOffset));
__ JumpIfSmi(rax, &runtime);
__ cmpl(rdx, rdi);
__ j(greater, &runtime);
+ // Reset offset for possibly sliced string.
+ __ Set(r14, 0);
// rax: RegExp data (FixedArray)
// Check the representation and encoding of the subject string.
Label seq_ascii_string, seq_two_byte_string, check_code;
__ movq(rdi, Operand(rsp, kSubjectOffset));
+ // Make a copy of the original subject string.
+ __ movq(r15, rdi);
__ movq(rbx, FieldOperand(rdi, HeapObject::kMapOffset));
__ movzxbl(rbx, FieldOperand(rbx, Map::kInstanceTypeOffset));
// First check for flat two byte string.
STATIC_ASSERT((kStringTag | kSeqStringTag | kTwoByteStringTag) == 0);
__ j(zero, &seq_two_byte_string, Label::kNear);
// Any other flat string must be a flat ascii string.
- __ testb(rbx, Immediate(kIsNotStringMask | kStringRepresentationMask));
+ __ andb(rbx, Immediate(kIsNotStringMask | kStringRepresentationMask));
__ j(zero, &seq_ascii_string, Label::kNear);
- // Check for flat cons string.
+ // Check for flat cons string or sliced string.
// A flat cons string is a cons string where the second part is the empty
// string. In that case the subject string is just the first part of the cons
// string. Also in this case the first part of the cons string is known to be
// a sequential string or an external string.
- STATIC_ASSERT(kExternalStringTag !=0);
- STATIC_ASSERT((kConsStringTag & kExternalStringTag) == 0);
- __ testb(rbx, Immediate(kIsNotStringMask | kExternalStringTag));
- __ j(not_zero, &runtime);
- // String is a cons string.
+ // In the case of a sliced string its offset has to be taken into account.
+ Label cons_string, check_encoding;
+ STATIC_ASSERT((kConsStringTag < kExternalStringTag));
+ STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
+ __ cmpq(rbx, Immediate(kExternalStringTag));
+ __ j(less, &cons_string, Label::kNear);
+ __ j(equal, &runtime);
+
+ // String is sliced.
+ __ SmiToInteger32(r14, FieldOperand(rdi, SlicedString::kOffsetOffset));
+ __ movq(rdi, FieldOperand(rdi, SlicedString::kParentOffset));
+ // r14: slice offset
+ // r15: original subject string
+ // rdi: parent string
+ __ jmp(&check_encoding, Label::kNear);
+ // String is a cons string, check whether it is flat.
+ __ bind(&cons_string);
__ CompareRoot(FieldOperand(rdi, ConsString::kSecondOffset),
Heap::kEmptyStringRootIndex);
__ j(not_equal, &runtime);
__ movq(rdi, FieldOperand(rdi, ConsString::kFirstOffset));
+ // rdi: first part of cons string or parent of sliced string.
+ // rbx: map of first part of cons string or map of parent of sliced string.
+ // Is first part of cons or parent of slice a flat two byte string?
+ __ bind(&check_encoding);
__ movq(rbx, FieldOperand(rdi, HeapObject::kMapOffset));
- // String is a cons string with empty second part.
- // rdi: first part of cons string.
- // rbx: map of first part of cons string.
- // Is first part a flat two byte string?
__ testb(FieldOperand(rbx, Map::kInstanceTypeOffset),
Immediate(kStringRepresentationMask | kStringEncodingMask));
STATIC_ASSERT((kSeqStringTag | kTwoByteStringTag) == 0);
// rbx: previous index
// rcx: encoding of subject string (1 if ascii 0 if two_byte);
// r11: code
+ // r14: slice offset
+ // r15: original subject string
+
+ // Argument 2: Previous index.
+ __ movq(arg2, rbx);
// Argument 4: End of string data
// Argument 3: Start of string data
- Label setup_two_byte, setup_rest;
+ Label setup_two_byte, setup_rest, got_length, length_not_from_slice;
+ // Prepare start and end index of the input.
+ // Load the length from the original sliced string if that is the case.
+ __ addq(rbx, r14);
+ __ SmiToInteger32(arg3, FieldOperand(r15, String::kLengthOffset));
+ __ addq(r14, arg3); // Using arg3 as scratch.
+
+ // rbx: start index of the input
+ // r14: end index of the input
+ // r15: original subject string
__ testb(rcx, rcx); // Last use of rcx as encoding of subject string.
__ j(zero, &setup_two_byte, Label::kNear);
- __ SmiToInteger32(rcx, FieldOperand(rdi, String::kLengthOffset));
- __ lea(arg4, FieldOperand(rdi, rcx, times_1, SeqAsciiString::kHeaderSize));
+ __ lea(arg4, FieldOperand(rdi, r14, times_1, SeqAsciiString::kHeaderSize));
__ lea(arg3, FieldOperand(rdi, rbx, times_1, SeqAsciiString::kHeaderSize));
__ jmp(&setup_rest, Label::kNear);
__ bind(&setup_two_byte);
- __ SmiToInteger32(rcx, FieldOperand(rdi, String::kLengthOffset));
- __ lea(arg4, FieldOperand(rdi, rcx, times_2, SeqTwoByteString::kHeaderSize));
+ __ lea(arg4, FieldOperand(rdi, r14, times_2, SeqTwoByteString::kHeaderSize));
__ lea(arg3, FieldOperand(rdi, rbx, times_2, SeqTwoByteString::kHeaderSize));
-
__ bind(&setup_rest);
- // Argument 2: Previous index.
- __ movq(arg2, rbx);
- // Argument 1: Subject string.
-#ifdef _WIN64
- __ movq(arg1, rdi);
-#else
- // Already there in AMD64 calling convention.
- ASSERT(arg1.is(rdi));
- USE(arg1);
-#endif
+ // Argument 1: Original subject string.
+ // The original subject is in the previous stack frame. Therefore we have to
+ // use rbp, which points exactly to one pointer size below the previous rsp.
+ // (Because creating a new stack frame pushes the previous rbp onto the stack
+ // and thereby moves up rsp by one kPointerSize.)
+ __ movq(arg1, r15);
// Locate the code entry and call it.
__ addq(r11, Immediate(Code::kHeaderSize - kHeapObjectTag));
Label flat_string;
Label ascii_string;
Label got_char_code;
+ Label sliced_string;
// If the receiver is a smi trigger the non-string case.
__ JumpIfSmi(object_, receiver_not_string_);
__ j(zero, &flat_string);
// Handle non-flat strings.
- __ testb(result_, Immediate(kIsConsStringMask));
- __ j(zero, &call_runtime_);
+ __ and_(result_, Immediate(kStringRepresentationMask));
+ STATIC_ASSERT((kConsStringTag < kExternalStringTag));
+ STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
+ __ cmpb(result_, Immediate(kExternalStringTag));
+ __ j(greater, &sliced_string);
+ __ j(equal, &call_runtime_);
// ConsString.
// Check whether the right hand side is the empty string (i.e. if
// this is really a flat string in a cons string). If that is not
// the case we would rather go to the runtime system now to flatten
// the string.
+ Label assure_seq_string;
__ CompareRoot(FieldOperand(object_, ConsString::kSecondOffset),
Heap::kEmptyStringRootIndex);
__ j(not_equal, &call_runtime_);
// Get the first of the two strings and load its instance type.
__ movq(object_, FieldOperand(object_, ConsString::kFirstOffset));
+ __ jmp(&assure_seq_string, Label::kNear);
+
+ // SlicedString, unpack and add offset.
+ __ bind(&sliced_string);
+ __ addq(scratch_, FieldOperand(object_, SlicedString::kOffsetOffset));
+ __ movq(object_, FieldOperand(object_, SlicedString::kParentOffset));
+
+ __ bind(&assure_seq_string);
__ movq(result_, FieldOperand(object_, HeapObject::kMapOffset));
__ movzxbl(result_, FieldOperand(result_, Map::kInstanceTypeOffset));
// If the first cons component is also non-flat, then go to runtime.
STATIC_ASSERT(kSeqStringTag == 0);
__ testb(result_, Immediate(kStringRepresentationMask));
__ j(not_zero, &call_runtime_);
+ __ jmp(&flat_string);
// Check for 1-byte or 2-byte string.
__ bind(&flat_string);
__ and_(rcx, Immediate(kStringRepresentationMask));
__ cmpl(rcx, Immediate(kExternalStringTag));
__ j(equal, &string_add_runtime);
+ // We cannot encounter sliced strings here since:
+ STATIC_ASSERT(SlicedString::kMinLength >= String::kMinNonFlatLength);
// Now check if both strings are ascii strings.
// rax: first string
// rbx: length of resulting flat string
void SubStringStub::Generate(MacroAssembler* masm) {
Label runtime;
+ if (FLAG_string_slices) {
+ __ jmp(&runtime);
+ }
// Stack frame on entry.
// rsp[0]: return address
// rsp[8]: to
};
Register string = ToRegister(instr->string());
- Register index = no_reg;
- int const_index = -1;
- if (instr->index()->IsConstantOperand()) {
- const_index = ToInteger32(LConstantOperand::cast(instr->index()));
- STATIC_ASSERT(String::kMaxLength <= Smi::kMaxValue);
- if (!Smi::IsValid(const_index)) {
- // Guaranteed to be out of bounds because of the assert above.
- // So the bounds check that must dominate this instruction must
- // have deoptimized already.
- if (FLAG_debug_code) {
- __ Abort("StringCharCodeAt: out of bounds index.");
- }
- // No code needs to be generated.
- return;
- }
- } else {
- index = ToRegister(instr->index());
- }
+ Register index = ToRegister(instr->index());
Register result = ToRegister(instr->result());
DeferredStringCharCodeAt* deferred =
new DeferredStringCharCodeAt(this, instr);
- Label flat_string, ascii_string, done;
-
// Fetch the instance type of the receiver into result register.
__ movq(result, FieldOperand(string, HeapObject::kMapOffset));
__ movzxbl(result, FieldOperand(result, Map::kInstanceTypeOffset));
- // We need special handling for non-sequential strings.
- STATIC_ASSERT(kSeqStringTag == 0);
- __ testb(result, Immediate(kStringRepresentationMask));
- __ j(zero, &flat_string, Label::kNear);
-
- // Handle cons strings and go to deferred code for the rest.
- __ testb(result, Immediate(kIsConsStringMask));
- __ j(zero, deferred->entry());
-
- // ConsString.
+ // We need special handling for indirect strings.
+ Label check_sequential;
+ __ testb(result, Immediate(kIsIndirectStringMask));
+ __ j(zero, &check_sequential, Label::kNear);
+
+ // Dispatch on the indirect string shape: slice or cons.
+ Label cons_string;
+ const uint32_t kSlicedNotConsMask = kSlicedStringTag & ~kConsStringTag;
+ ASSERT(IsPowerOf2(kSlicedNotConsMask) && kSlicedNotConsMask != 0);
+ __ testb(result, Immediate(kSlicedNotConsMask));
+ __ j(zero, &cons_string, Label::kNear);
+
+ // Handle slices.
+ Label indirect_string_loaded;
+ __ SmiToInteger32(result, FieldOperand(string, SlicedString::kOffsetOffset));
+ __ addq(index, result);
+ __ movq(string, FieldOperand(string, SlicedString::kParentOffset));
+ __ jmp(&indirect_string_loaded, Label::kNear);
+
+ // Handle conses.
// Check whether the right hand side is the empty string (i.e. if
// this is really a flat string in a cons string). If that is not
// the case we would rather go to the runtime system now to flatten
// the string.
+ __ bind(&cons_string);
__ CompareRoot(FieldOperand(string, ConsString::kSecondOffset),
Heap::kEmptyStringRootIndex);
__ j(not_equal, deferred->entry());
- // Get the first of the two strings and load its instance type.
__ movq(string, FieldOperand(string, ConsString::kFirstOffset));
+
+ __ bind(&indirect_string_loaded);
__ movq(result, FieldOperand(string, HeapObject::kMapOffset));
__ movzxbl(result, FieldOperand(result, Map::kInstanceTypeOffset));
- // If the first cons component is also non-flat, then go to runtime.
+
+ // Check whether the string is sequential. The only non-sequential
+ // shapes we support have just been unwrapped above.
+ __ bind(&check_sequential);
STATIC_ASSERT(kSeqStringTag == 0);
__ testb(result, Immediate(kStringRepresentationMask));
__ j(not_zero, deferred->entry());
- // Check for ASCII or two-byte string.
- __ bind(&flat_string);
+ // Dispatch on the encoding: ASCII or two-byte.
+ Label ascii_string;
STATIC_ASSERT(kAsciiStringTag != 0);
__ testb(result, Immediate(kStringEncodingMask));
__ j(not_zero, &ascii_string, Label::kNear);
// Two-byte string.
// Load the two-byte character code into the result register.
+ Label done;
STATIC_ASSERT(kSmiTag == 0 && kSmiTagSize == 1);
- if (instr->index()->IsConstantOperand()) {
- __ movzxwl(result,
- FieldOperand(string,
- SeqTwoByteString::kHeaderSize +
- (kUC16Size * const_index)));
- } else {
- __ movzxwl(result, FieldOperand(string,
- index,
- times_2,
- SeqTwoByteString::kHeaderSize));
- }
+ __ movzxwl(result, FieldOperand(string,
+ index,
+ times_2,
+ SeqTwoByteString::kHeaderSize));
__ jmp(&done, Label::kNear);
// ASCII string.
// Load the byte into the result register.
__ bind(&ascii_string);
- if (instr->index()->IsConstantOperand()) {
- __ movzxbl(result, FieldOperand(string,
- SeqAsciiString::kHeaderSize + const_index));
- } else {
- __ movzxbl(result, FieldOperand(string,
- index,
- times_1,
- SeqAsciiString::kHeaderSize));
- }
+ __ movzxbl(result, FieldOperand(string,
+ index,
+ times_1,
+ SeqAsciiString::kHeaderSize));
__ bind(&done);
__ bind(deferred->exit());
}
LInstruction* LChunkBuilder::DoStringCharCodeAt(HStringCharCodeAt* instr) {
- LOperand* string = UseRegister(instr->string());
- LOperand* index = UseRegisterOrConstant(instr->index());
+ LOperand* string = UseTempRegister(instr->string());
+ LOperand* index = UseTempRegister(instr->index());
LStringCharCodeAt* result = new LStringCharCodeAt(string, index);
return AssignEnvironment(AssignPointerMap(DefineAsRegister(result)));
}
}
// Prepare for possible GC.
- HandleScope handles;
+ HandleScope handles(isolate);
Handle<Code> code_handle(re_code);
Handle<String> subject(frame_entry<String*>(re_frame, kInputString));
+
// Current string.
- bool is_ascii = subject->IsAsciiRepresentation();
+ bool is_ascii = subject->IsAsciiRepresentationUnderneath();
ASSERT(re_code->instruction_start() <= *return_address);
ASSERT(*return_address <=
MaybeObject* result = Execution::HandleStackGuardInterrupt();
if (*code_handle != re_code) { // Return address no longer valid
- intptr_t delta = *code_handle - re_code;
+ int delta = *code_handle - re_code;
// Overwrite the return address on the stack.
*return_address += delta;
}
return EXCEPTION;
}
+ Handle<String> subject_tmp = subject;
+ int slice_offset = 0;
+
+ // Extract the underlying string and the slice offset.
+ if (StringShape(*subject_tmp).IsCons()) {
+ subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());
+ } else if (StringShape(*subject_tmp).IsSliced()) {
+ SlicedString* slice = SlicedString::cast(*subject_tmp);
+ subject_tmp = Handle<String>(slice->parent());
+ slice_offset = slice->offset();
+ }
+
// String might have changed.
- if (subject->IsAsciiRepresentation() != is_ascii) {
+ if (subject_tmp->IsAsciiRepresentation() != is_ascii) {
// If we changed between an ASCII and an UC16 string, the specialized
// code cannot be used, and we need to restart regexp matching from
// scratch (including, potentially, compiling a new version of the code).
// be a sequential or external string with the same content.
// Update the start and end pointers in the stack frame to the current
// location (whether it has actually moved or not).
- ASSERT(StringShape(*subject).IsSequential() ||
- StringShape(*subject).IsExternal());
+ ASSERT(StringShape(*subject_tmp).IsSequential() ||
+ StringShape(*subject_tmp).IsExternal());
// The original start address of the characters to match.
const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart);
// Find the current start address of the same character at the current string
// position.
int start_index = frame_entry<int>(re_frame, kStartIndex);
- const byte* new_address = StringCharacterPosition(*subject, start_index);
+ const byte* new_address = StringCharacterPosition(*subject_tmp,
+ start_index + slice_offset);
if (start_address != new_address) {
// If there is a difference, update the object pointer and start and end
f = v8::Local<v8::Function>::Cast(env->Global()->Get(v8::String::New("f")));
g = v8::Local<v8::Function>::Cast(env->Global()->Get(v8::String::New("g")));
- // Chesk that a break point was hit when the script was run.
+ // Check that a break point was hit when the script was run.
CHECK_EQ(1, break_point_hit_count);
CHECK_EQ(0, StrLength(last_function_hit));
" return 0;"
"};"
"test()";
- CHECK_EQ(0,
- v8::Script::Compile(v8::String::New(source))->Run()->Int32Value());
+ CHECK_EQ(0, CompileRun(source)->Int32Value());
}
}
}
}
+
+
+TEST(SliceFromCons) {
+ FLAG_string_slices = true;
+ InitializeVM();
+ v8::HandleScope scope;
+ Handle<String> string =
+ FACTORY->NewStringFromAscii(CStrVector("parentparentparent"));
+ Handle<String> parent = FACTORY->NewConsString(string, string);
+ CHECK(parent->IsConsString());
+ CHECK(!parent->IsFlat());
+ Handle<String> slice = FACTORY->NewSubString(parent, 1, 25);
+ // After slicing, the original string becomes a flat cons.
+ CHECK(parent->IsFlat());
+ CHECK(slice->IsSlicedString());
+ CHECK_EQ(SlicedString::cast(*slice)->parent(),
+ ConsString::cast(*parent)->first());
+ CHECK(SlicedString::cast(*slice)->parent()->IsSeqString());
+ CHECK(slice->IsFlat());
+}
+
+
+TEST(TrivialSlice) {
+ // This tests whether a slice that contains the entire parent string
+ // actually creates a new string (it should not).
+ FLAG_string_slices = true;
+ InitializeVM();
+ HandleScope scope;
+ v8::Local<v8::Value> result;
+ Handle<String> string;
+ const char* init = "var str = 'abcdefghijklmnopqrstuvwxyz';";
+ const char* check = "str.slice(0,26)";
+ const char* crosscheck = "str.slice(1,25)";
+
+ CompileRun(init);
+
+ result = CompileRun(check);
+ CHECK(result->IsString());
+ string = v8::Utils::OpenHandle(v8::String::Cast(*result));
+ CHECK(!string->IsSlicedString());
+
+ string = FACTORY->NewSubString(string, 0, 26);
+ CHECK(!string->IsSlicedString());
+ result = CompileRun(crosscheck);
+ CHECK(result->IsString());
+ string = v8::Utils::OpenHandle(v8::String::Cast(*result));
+ CHECK(string->IsSlicedString());
+ CHECK_EQ("bcdefghijklmnopqrstuvwxy", *(string->ToCString()));
+}
--- /dev/null
+// Copyright 2009 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Flags: --string-slices
+
+//assertEquals('345"12345 6"1234567"123',
+// '12345""12345 6""1234567""1234'.slice(2,-1).replace(/""/g, '"'));
+
+var foo = "lsdfj sldkfj sdklfj læsdfjl sdkfjlsdk fjsdl fjsdljskdj flsj flsdkj flskd regexp: /foobar/\nldkfj sdlkfj sdkl";
+for(var i = 0; i < 1000; i++) {
+ assertTrue(/^([a-z]+): (.*)/.test(foo.substring(foo.indexOf("regexp:"))));
+ assertEquals("regexp", RegExp.$1, "RegExp.$1");
+}
+
+var re = /^(((N({)?)|(R)|(U)|(V)|(B)|(H)|(n((n)|(r)|(v)|(h))?)|(r(r)?)|(v)|(b((n)|(b))?)|(h))|((Y)|(A)|(E)|(o(u)?)|(p(u)?)|(q(u)?)|(s)|(t)|(u)|(w)|(x(u)?)|(y)|(z)|(a((T)|(A)|(L))?)|(c)|(e)|(f(u)?)|(g(u)?)|(i)|(j)|(l)|(m(u)?)))+/;
+var r = new RegExp(re)
+var str = "_Avtnennan gunzvmu pubExnY nEvln vaTxh rmuhguhaTxnY_".slice(1,-1);
+str = str + str;
+assertTrue(r.test(str));
+assertTrue(r.test(str));
+var re = /x/;
+assertEquals("a.yb", "_axyb_".slice(1,-1).replace(re, "."));
+re.compile("y");
+assertEquals("ax.b", "_axyb_".slice(1,-1).replace(re, "."));
+re.compile("(x)");
+assertEquals(["x", "x"], re.exec("_axyb_".slice(1,-1)));
+re.compile("(y)");
+assertEquals(["y", "y"], re.exec("_axyb_".slice(1,-1)));
+
+for(var i = 0; i < 100; i++) {
+ var a = "aaaaaaaaaaaaaaaaaaaaaaaabbaacabbabaaaaabbaaaabbac".slice(24,-1);
+ var b = "bbaacabbabaaaaabbaaaabba" + a;
+ // The first time, the cons string will be flattened and handled by the
+ // runtime system.
+ assertEquals(["bbaa", "a", "", "a"], /((\3|b)\2(a)){2,}/.exec(b));
+ // The second time, the cons string is already flattened and will be
+ // handled by generated code.
+ assertEquals(["bbaa", "a", "", "a"], /((\3|b)\2(a)){2,}/.exec(b));
+ assertEquals(["bbaa", "a", "", "a"], /((\3|b)\2(a)){2,}/.exec(a));
+ assertEquals(["bbaa", "a", "", "a"], /((\3|b)\2(a)){2,}/.exec(a));
+}
+
+var c = "ABCDEFGHIJKLMN".slice(2,-2);
+var d = "ABCDEF\u1234GHIJKLMN".slice(2,-2);
+var e = "ABCDEFGHIJKLMN".slice(0,-2);
+assertTrue(/^C.*L$/.test(c));
+assertTrue(/^C.*L$/.test(c));
+assertTrue(/^C.*L$/.test(d));
+assertTrue(/^C.*L$/.test(d));
+assertTrue(/^A\w{10}L$/.test(e));
+assertTrue(/^A\w{10}L$/.test(e));
+
+var e = "qui-opIasd-fghjklzx-cvbn-mqwer-tyuio-pasdf-ghIjkl-zx".slice(6,-6);
+var e_split = e.split("-");
+assertEquals(e_split[0], "Iasd");
+assertEquals(e_split[1], "fghjklzx");
+assertEquals(e_split[6], "ghI");
--- /dev/null
+// Copyright 2008 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Flags: --string-slices --expose-externalize-string
+
+var s = 'abcdefghijklmn';
+assertEquals(s, s.substr());
+assertEquals(s, s.substr(0));
+assertEquals(s, s.substr('0'));
+assertEquals(s, s.substr(void 0));
+assertEquals(s, s.substr(null));
+assertEquals(s, s.substr(false));
+assertEquals(s, s.substr(0.9));
+assertEquals(s, s.substr({ valueOf: function() { return 0; } }));
+assertEquals(s, s.substr({ toString: function() { return '0'; } }));
+
+var s1 = s.substring(1);
+assertEquals(s1, s.substr(1));
+assertEquals(s1, s.substr('1'));
+assertEquals(s1, s.substr(true));
+assertEquals(s1, s.substr(1.1));
+assertEquals(s1, s.substr({ valueOf: function() { return 1; } }));
+assertEquals(s1, s.substr({ toString: function() { return '1'; } }));
+
+
+assertEquals(s.substring(s.length - 1), s.substr(-1));
+assertEquals(s.substring(s.length - 1), s.substr(-1.2));
+assertEquals(s.substring(s.length - 1), s.substr(-1.7));
+assertEquals(s.substring(s.length - 2), s.substr(-2));
+assertEquals(s.substring(s.length - 2), s.substr(-2.3));
+assertEquals(s.substring(s.length - 2, s.length - 1), s.substr(-2, 1));
+assertEquals(s, s.substr(-100));
+assertEquals('abc', s.substr(-100, 3));
+assertEquals(s1, s.substr(-s.length + 1));
+
+// assertEquals('', s.substr(0, void 0)); // smjs and rhino
+assertEquals('abcdefghijklmn', s.substr(0, void 0)); // kjs and v8
+assertEquals('', s.substr(0, null));
+assertEquals(s, s.substr(0, String(s.length)));
+assertEquals('a', s.substr(0, true));
+
+
+// Test substrings of different lengths and alignments.
+// First ASCII.
+var x = "ASCII";
+for (var i = 0; i < 25; i++) {
+ x += (i >> 4).toString(16) + (i & 0x0f).toString(16);
+}
+/x/.exec(x); // Try to force a flatten.
+for (var i = 5; i < 25; i++) {
+ for (var j = 12; j < 25; j++) {
+ var z = x.substring(i, i+j);
+ var w = Math.random() * 42; // Allocate something new in new-space.
+ assertEquals(j, z.length);
+ for (var k = 0; k < j; k++) {
+ assertEquals(x.charAt(i+k), z.charAt(k));
+ }
+ }
+}
+// Then two-byte strings.
+x = "UC16\u2028"; // Non-ascii char forces two-byte string.
+for (var i = 0; i < 25; i++) {
+ x += (i >> 4).toString(16) + (i & 0x0f).toString(16);
+}
+/x/.exec(x); // Try to force a flatten.
+for (var i = 5; i < 25; i++) {
+ for (var j = 0; j < 25; j++) {
+ var z = x.substring(i, i + j);
+ var w = Math.random() * 42; // Allocate something new in new-space.
+ assertEquals(j, z.length);
+ for (var k = 0; k < j; k++) {
+ assertEquals(x.charAt(i+k), z.charAt(k));
+ }
+ }
+}
+
+// Keep creating strings to to force allocation failure on substring creation.
+var x = "0123456789ABCDEF";
+x += x; // 2^5
+x += x;
+x += x;
+x += x;
+x += x;
+x += x; // 2^10
+x += x;
+x += x;
+var xl = x.length;
+var cache = [];
+for (var i = 0; i < 10000; i++) {
+ var z = x.substring(i % xl);
+ assertEquals(xl - (i % xl), z.length);
+ cache.push(z);
+}
+
+
+// Same with two-byte strings
+var x = "\u2028123456789ABCDEF";
+x += x; // 2^5
+x += x;
+x += x;
+x += x;
+x += x;
+x += x; // 2^10
+x += x;
+x += x;
+var xl = x.length;
+var cache = [];
+for (var i = 0; i < 10000; i++) {
+ var z = x.substring(i % xl);
+ assertEquals(xl - (i % xl), z.length);
+ cache.push(z);
+}
+
+// Substring of substring.
+var cache = [];
+var last = x;
+var offset = 0;
+for (var i = 0; i < 64; i++) {
+ var z = last.substring(i);
+ last = z;
+ cache.push(z);
+ offset += i;
+}
+for (var i = 63; i >= 0; i--) {
+ var z = cache.pop();
+ assertTrue(/\u2028123456789ABCDEF/.test(z));
+ assertEquals(xl - offset, z.length);
+ offset -= i;
+}
+
+// Test charAt for different strings.
+function f(s1, s2, s3, i) {
+ assertEquals(String.fromCharCode(97+i%11), s1.charAt(i%11));
+ assertEquals(String.fromCharCode(97+i%11), s2.charAt(i%11));
+ assertEquals(String.fromCharCode(98+i%11), s3.charAt(i%11));
+ assertEquals(String.fromCharCode(101), s3.charAt(3));
+}
+
+flat = "abcdefghijkl12345";
+cons = flat + flat.toUpperCase();
+slice = "abcdefghijklmn12345".slice(1, -1);
+for ( var i = 0; i < 1000; i++) {
+ f(flat, cons, slice, i);
+}
+flat = "abcdefghijkl1\u20232345";
+cons = flat + flat.toUpperCase();
+slice = "abcdefghijklmn1\u20232345".slice(1, -1);
+for ( var i = 0; i < 1000; i++) {
+ f(flat, cons, slice, i);
+}
+
+// Concatenate substrings.
+var ascii = 'abcdefghijklmnop';
+var utf = '\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB';
+assertEquals("klmno", ascii.substring(10,15) + ascii.substring(16));
+assertEquals("\u03B4\u03B7", utf.substring(3,4) + utf.substring(6,7));
+assertEquals("klp", ascii.substring(10,12) + ascii.substring(15,16));
+assertEquals("\u03B1\u03B4\u03B5", utf.substring(0,1) + utf.substring(5,3));
+assertEquals("", ascii.substring(16) + utf.substring(16));
+assertEquals("bcdef\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9",
+ ascii.substring(1,6) + utf.substring(3,9));
+assertEquals("\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9abcdefghijklmnop",
+ utf.substring(3,9) + ascii);
+assertEquals("\u03B2\u03B3\u03B4\u03B5\u03B4\u03B5\u03B6\u03B7",
+ utf.substring(5,1) + utf.substring(3,7));
+
+/*
+// Externalizing strings.
+var a = "123456789qwertyuiopasdfghjklzxcvbnm";
+var b = a.slice(1,-1);
+assertEquals(a.slice(1,-1), b);
+externalizeString(a);
+assertEquals(a.slice(1,-1), b);
+*/
\ No newline at end of file
assertEquals(xl - (i % xl), z.length);
cache.push(z);
}
+
+// Substring of substring.
+var cache = [];
+var last = x;
+var offset = 0;
+for (var i = 0; i < 64; i++) {
+ var z = last.substring(i);
+ last = z;
+ cache.push(z);
+ offset += i;
+}
+for (var i = 63; i >= 0; i--) {
+ var z = cache.pop();
+ assertTrue(/\u2028123456789ABCDEF/.test(z));
+ assertEquals(xl - offset, z.length);
+ offset -= i;
+}