From: yangguo@chromium.org Date: Wed, 5 Dec 2012 15:49:22 +0000 (+0000) Subject: Improve array to string conversion. X-Git-Tag: upstream/4.7.83~15493 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c75ca450001137a4e63259d455f9540f872390cf;p=platform%2Fupstream%2Fv8.git Improve array to string conversion. BUG=v8:2435 Review URL: https://chromiumcodereview.appspot.com/11348349 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13144 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- diff --git a/src/arm/codegen-arm.cc b/src/arm/codegen-arm.cc index a2762f8..5e8739c 100644 --- a/src/arm/codegen-arm.cc +++ b/src/arm/codegen-arm.cc @@ -521,6 +521,50 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, } +void SeqStringSetCharGenerator::Generate(MacroAssembler* masm, + String::Encoding encoding, + Register string, + Register index, + Register value) { + if (FLAG_debug_code) { + __ tst(index, Operand(kSmiTagMask)); + __ Check(eq, "Non-smi index"); + __ tst(value, Operand(kSmiTagMask)); + __ Check(eq, "Non-smi value"); + + __ ldr(ip, FieldMemOperand(string, String::kLengthOffset)); + __ cmp(index, ip); + __ Check(lt, "Index is too large"); + + __ cmp(index, Operand(Smi::FromInt(0))); + __ Check(ge, "Index is negative"); + + __ ldr(ip, FieldMemOperand(string, HeapObject::kMapOffset)); + __ ldrb(ip, FieldMemOperand(ip, Map::kInstanceTypeOffset)); + + __ and_(ip, ip, Operand(kStringRepresentationMask | kStringEncodingMask)); + static const uint32_t one_byte_seq_type = kSeqStringTag | kOneByteStringTag; + static const uint32_t two_byte_seq_type = kSeqStringTag | kTwoByteStringTag; + __ cmp(ip, Operand(encoding == String::ONE_BYTE_ENCODING + ? one_byte_seq_type : two_byte_seq_type)); + __ Check(eq, "Unexpected string type"); + } + + __ add(ip, + string, + Operand(SeqString::kHeaderSize - kHeapObjectTag)); + __ SmiUntag(value, value); + STATIC_ASSERT(kSmiTagSize == 1 && kSmiTag == 0); + if (encoding == String::ONE_BYTE_ENCODING) { + // Smis are tagged by left shift by 1, thus LSR by 1 to smi-untag inline. + __ strb(value, MemOperand(ip, index, LSR, 1)); + } else { + // No need to untag a smi for two-byte addressing. + __ strh(value, MemOperand(ip, index)); + } +} + + static MemOperand ExpConstant(int index, Register base) { return MemOperand(base, index * kDoubleSize); } diff --git a/src/arm/full-codegen-arm.cc b/src/arm/full-codegen-arm.cc index b0df846..631e101 100644 --- a/src/arm/full-codegen-arm.cc +++ b/src/arm/full-codegen-arm.cc @@ -3142,6 +3142,39 @@ void FullCodeGenerator::EmitDateField(CallRuntime* expr) { } +void FullCodeGenerator::EmitOneByteSeqStringSetChar(CallRuntime* expr) { + ZoneList* args = expr->arguments(); + ASSERT_EQ(3, args->length()); + + VisitForStackValue(args->at(1)); // index + VisitForStackValue(args->at(2)); // value + __ pop(r2); + __ pop(r1); + VisitForAccumulatorValue(args->at(0)); // string + + static const String::Encoding encoding = String::ONE_BYTE_ENCODING; + SeqStringSetCharGenerator::Generate(masm_, encoding, r0, r1, r2); + context()->Plug(r0); +} + + +void FullCodeGenerator::EmitTwoByteSeqStringSetChar(CallRuntime* expr) { + ZoneList* args = expr->arguments(); + ASSERT_EQ(3, args->length()); + + VisitForStackValue(args->at(1)); // index + VisitForStackValue(args->at(2)); // value + __ pop(r2); + __ pop(r1); + VisitForAccumulatorValue(args->at(0)); // string + + static const String::Encoding encoding = String::TWO_BYTE_ENCODING; + SeqStringSetCharGenerator::Generate(masm_, encoding, r0, r1, r2); + context()->Plug(r0); +} + + + void FullCodeGenerator::EmitMathPow(CallRuntime* expr) { // Load the arguments on the stack and call the runtime function. ZoneList* args = expr->arguments(); diff --git a/src/arm/lithium-arm.cc b/src/arm/lithium-arm.cc index 400e1fc..921489e 100644 --- a/src/arm/lithium-arm.cc +++ b/src/arm/lithium-arm.cc @@ -1647,6 +1647,16 @@ LInstruction* LChunkBuilder::DoDateField(HDateField* instr) { } +LInstruction* LChunkBuilder::DoSeqStringSetChar(HSeqStringSetChar* instr) { + LOperand* string = UseRegister(instr->string()); + LOperand* index = UseRegister(instr->index()); + LOperand* value = UseRegister(instr->value()); + LSeqStringSetChar* result = + new(zone()) LSeqStringSetChar(instr->encoding(), string, index, value); + return DefineAsRegister(result); +} + + LInstruction* LChunkBuilder::DoBoundsCheck(HBoundsCheck* instr) { LOperand* value = UseRegisterOrConstantAtStart(instr->index()); LOperand* length = UseRegister(instr->length()); diff --git a/src/arm/lithium-arm.h b/src/arm/lithium-arm.h index 3a9d10b..1b589ce 100644 --- a/src/arm/lithium-arm.h +++ b/src/arm/lithium-arm.h @@ -150,6 +150,7 @@ class LCodeGen; V(Random) \ V(RegExpLiteral) \ V(Return) \ + V(SeqStringSetChar) \ V(ShiftI) \ V(SmiTag) \ V(SmiUntag) \ @@ -1203,6 +1204,30 @@ class LDateField: public LTemplateInstruction<1, 1, 1> { }; +class LSeqStringSetChar: public LTemplateInstruction<1, 3, 0> { + public: + LSeqStringSetChar(String::Encoding encoding, + LOperand* string, + LOperand* index, + LOperand* value) : encoding_(encoding) { + inputs_[0] = string; + inputs_[1] = index; + inputs_[2] = value; + } + + String::Encoding encoding() { return encoding_; } + LOperand* string() { return inputs_[0]; } + LOperand* index() { return inputs_[1]; } + LOperand* value() { return inputs_[2]; } + + DECLARE_CONCRETE_INSTRUCTION(SeqStringSetChar, "seq-string-set-char") + DECLARE_HYDROGEN_ACCESSOR(SeqStringSetChar) + + private: + String::Encoding encoding_; +}; + + class LThrow: public LTemplateInstruction<0, 1, 0> { public: explicit LThrow(LOperand* value) { diff --git a/src/arm/lithium-codegen-arm.cc b/src/arm/lithium-codegen-arm.cc index 1c9d0c4..1cfa8e9 100644 --- a/src/arm/lithium-codegen-arm.cc +++ b/src/arm/lithium-codegen-arm.cc @@ -1845,6 +1845,15 @@ void LCodeGen::DoDateField(LDateField* instr) { } +void LCodeGen::DoSeqStringSetChar(LSeqStringSetChar* instr) { + SeqStringSetCharGenerator::Generate(masm(), + instr->encoding(), + ToRegister(instr->string()), + ToRegister(instr->index()), + ToRegister(instr->value())); +} + + void LCodeGen::DoBitNotI(LBitNotI* instr) { Register input = ToRegister(instr->value()); Register result = ToRegister(instr->result()); diff --git a/src/codegen.h b/src/codegen.h index 3d14502..0ac68c2 100644 --- a/src/codegen.h +++ b/src/codegen.h @@ -104,6 +104,19 @@ class ElementsTransitionGenerator : public AllStatic { DISALLOW_COPY_AND_ASSIGN(ElementsTransitionGenerator); }; + +class SeqStringSetCharGenerator : public AllStatic { + public: + static void Generate(MacroAssembler* masm, + String::Encoding encoding, + Register string, + Register index, + Register value); + private: + DISALLOW_COPY_AND_ASSIGN(SeqStringSetCharGenerator); +}; + + } } // namespace v8::internal #endif // V8_CODEGEN_H_ diff --git a/src/hydrogen-instructions.h b/src/hydrogen-instructions.h index 47a9da3..3006734 100644 --- a/src/hydrogen-instructions.h +++ b/src/hydrogen-instructions.h @@ -155,6 +155,7 @@ class LChunkBuilder; V(Return) \ V(Ror) \ V(Sar) \ + V(SeqStringSetChar) \ V(Shl) \ V(Shr) \ V(Simulate) \ @@ -5212,6 +5213,33 @@ class HDateField: public HUnaryOperation { }; +class HSeqStringSetChar: public HTemplateInstruction<3> { + public: + HSeqStringSetChar(String::Encoding encoding, + HValue* string, + HValue* index, + HValue* value) : encoding_(encoding) { + SetOperandAt(0, string); + SetOperandAt(1, index); + SetOperandAt(2, value); + } + + String::Encoding encoding() { return encoding_; } + HValue* string() { return OperandAt(0); } + HValue* index() { return OperandAt(1); } + HValue* value() { return OperandAt(2); } + + virtual Representation RequiredInputRepresentation(int index) { + return Representation::Tagged(); + } + + DECLARE_CONCRETE_INSTRUCTION(SeqStringSetChar) + + private: + String::Encoding encoding_; +}; + + class HDeleteProperty: public HBinaryOperation { public: HDeleteProperty(HValue* context, HValue* obj, HValue* key) diff --git a/src/hydrogen.cc b/src/hydrogen.cc index 4fd14e8..6291dcb 100644 --- a/src/hydrogen.cc +++ b/src/hydrogen.cc @@ -9180,6 +9180,39 @@ void HOptimizedGraphBuilder::GenerateDateField(CallRuntime* call) { } +void HOptimizedGraphBuilder::GenerateOneByteSeqStringSetChar( + CallRuntime* call) { + ASSERT(call->arguments()->length() == 3); + CHECK_ALIVE(VisitForValue(call->arguments()->at(0))); + CHECK_ALIVE(VisitForValue(call->arguments()->at(1))); + CHECK_ALIVE(VisitForValue(call->arguments()->at(2))); + HValue* value = Pop(); + HValue* index = Pop(); + HValue* string = Pop(); + HSeqStringSetChar* result = new(zone()) HSeqStringSetChar( + String::ONE_BYTE_ENCODING, string, index, value); + return ast_context()->ReturnInstruction(result, call->id()); +} + + +void HOptimizedGraphBuilder::GenerateTwoByteSeqStringSetChar( + CallRuntime* call) { + ASSERT(call->arguments()->length() == 3); + CHECK_ALIVE(VisitForValue(call->arguments()->at(0))); + CHECK_ALIVE(VisitForValue(call->arguments()->at(1))); + CHECK_ALIVE(VisitForValue(call->arguments()->at(2))); + HValue* value = Pop(); + HValue* index = Pop(); + HValue* string = Pop(); + HValue* context = environment()->LookupContext(); + HStringCharCodeAt* char_code = BuildStringCharCodeAt(context, string, index); + AddInstruction(char_code); + HSeqStringSetChar* result = new(zone()) HSeqStringSetChar( + String::TWO_BYTE_ENCODING, string, index, value); + return ast_context()->ReturnInstruction(result, call->id()); +} + + void HOptimizedGraphBuilder::GenerateSetValueOf(CallRuntime* call) { ASSERT(call->arguments()->length() == 2); CHECK_ALIVE(VisitForValue(call->arguments()->at(0))); diff --git a/src/ia32/codegen-ia32.cc b/src/ia32/codegen-ia32.cc index 2f5553c..9477bf1 100644 --- a/src/ia32/codegen-ia32.cc +++ b/src/ia32/codegen-ia32.cc @@ -793,6 +793,50 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, } +void SeqStringSetCharGenerator::Generate(MacroAssembler* masm, + String::Encoding encoding, + Register string, + Register index, + Register value) { + if (FLAG_debug_code) { + __ test(index, Immediate(kSmiTagMask)); + __ Check(zero, "Non-smi index"); + __ test(value, Immediate(kSmiTagMask)); + __ Check(zero, "Non-smi value"); + + __ cmp(index, FieldOperand(string, String::kLengthOffset)); + __ Check(less, "Index is too large"); + + __ cmp(index, Immediate(Smi::FromInt(0))); + __ Check(greater_equal, "Index is negative"); + + __ push(value); + __ mov(value, FieldOperand(string, HeapObject::kMapOffset)); + __ movzx_b(value, FieldOperand(value, Map::kInstanceTypeOffset)); + + __ and_(value, Immediate(kStringRepresentationMask | kStringEncodingMask)); + static const uint32_t one_byte_seq_type = kSeqStringTag | kOneByteStringTag; + static const uint32_t two_byte_seq_type = kSeqStringTag | kTwoByteStringTag; + __ cmp(value, Immediate(encoding == String::ONE_BYTE_ENCODING + ? one_byte_seq_type : two_byte_seq_type)); + __ Check(equal, "Unexpected string type"); + __ pop(value); + } + + __ SmiUntag(value); + STATIC_ASSERT(kSmiTagSize == 1 && kSmiTag == 0); + if (encoding == String::ONE_BYTE_ENCODING) { + __ SmiUntag(index); + __ mov_b(FieldOperand(string, index, times_1, SeqString::kHeaderSize), + value); + } else { + // No need to untag a smi for two-byte addressing. + __ mov_w(FieldOperand(string, index, times_1, SeqString::kHeaderSize), + value); + } +} + + static Operand ExpConstant(int index) { return Operand::StaticVariable(ExternalReference::math_exp_constants(index)); } diff --git a/src/ia32/full-codegen-ia32.cc b/src/ia32/full-codegen-ia32.cc index 6dfec92..fdafe39 100644 --- a/src/ia32/full-codegen-ia32.cc +++ b/src/ia32/full-codegen-ia32.cc @@ -3074,6 +3074,38 @@ void FullCodeGenerator::EmitDateField(CallRuntime* expr) { } +void FullCodeGenerator::EmitOneByteSeqStringSetChar(CallRuntime* expr) { + ZoneList* args = expr->arguments(); + ASSERT_EQ(3, args->length()); + + VisitForStackValue(args->at(1)); // index + VisitForStackValue(args->at(2)); // value + __ pop(ecx); + __ pop(ebx); + VisitForAccumulatorValue(args->at(0)); // string + + static const String::Encoding encoding = String::ONE_BYTE_ENCODING; + SeqStringSetCharGenerator::Generate(masm_, encoding, eax, ebx, ecx); + context()->Plug(eax); +} + + +void FullCodeGenerator::EmitTwoByteSeqStringSetChar(CallRuntime* expr) { + ZoneList* args = expr->arguments(); + ASSERT_EQ(3, args->length()); + + VisitForStackValue(args->at(1)); // index + VisitForStackValue(args->at(2)); // value + __ pop(ecx); + __ pop(ebx); + VisitForAccumulatorValue(args->at(0)); // string + + static const String::Encoding encoding = String::TWO_BYTE_ENCODING; + SeqStringSetCharGenerator::Generate(masm_, encoding, eax, ebx, ecx); + context()->Plug(eax); +} + + void FullCodeGenerator::EmitMathPow(CallRuntime* expr) { // Load the arguments on the stack and call the runtime function. ZoneList* args = expr->arguments(); diff --git a/src/ia32/lithium-codegen-ia32.cc b/src/ia32/lithium-codegen-ia32.cc index f83c761..ff1ec90 100644 --- a/src/ia32/lithium-codegen-ia32.cc +++ b/src/ia32/lithium-codegen-ia32.cc @@ -1709,6 +1709,15 @@ void LCodeGen::DoDateField(LDateField* instr) { } +void LCodeGen::DoSeqStringSetChar(LSeqStringSetChar* instr) { + SeqStringSetCharGenerator::Generate(masm(), + instr->encoding(), + ToRegister(instr->string()), + ToRegister(instr->index()), + ToRegister(instr->value())); +} + + void LCodeGen::DoBitNotI(LBitNotI* instr) { LOperand* input = instr->value(); ASSERT(input->Equals(instr->result())); diff --git a/src/ia32/lithium-ia32.cc b/src/ia32/lithium-ia32.cc index 098e6ff..63afef6 100644 --- a/src/ia32/lithium-ia32.cc +++ b/src/ia32/lithium-ia32.cc @@ -1660,6 +1660,17 @@ LInstruction* LChunkBuilder::DoDateField(HDateField* instr) { } +LInstruction* LChunkBuilder::DoSeqStringSetChar(HSeqStringSetChar* instr) { + LOperand* string = UseRegister(instr->string()); + LOperand* index = UseRegister(instr->index()); + ASSERT(ecx.is_byte_register()); + LOperand* value = UseFixed(instr->value(), ecx); + LSeqStringSetChar* result = + new(zone()) LSeqStringSetChar(instr->encoding(), string, index, value); + return DefineSameAsFirst(result); +} + + LInstruction* LChunkBuilder::DoBoundsCheck(HBoundsCheck* instr) { return AssignEnvironment(new(zone()) LBoundsCheck( UseRegisterOrConstantAtStart(instr->index()), diff --git a/src/ia32/lithium-ia32.h b/src/ia32/lithium-ia32.h index bec2f81..f4056c1 100644 --- a/src/ia32/lithium-ia32.h +++ b/src/ia32/lithium-ia32.h @@ -144,6 +144,7 @@ class LCodeGen; V(PushArgument) \ V(RegExpLiteral) \ V(Return) \ + V(SeqStringSetChar) \ V(ShiftI) \ V(SmiTag) \ V(SmiUntag) \ @@ -1184,6 +1185,30 @@ class LDateField: public LTemplateInstruction<1, 1, 1> { }; +class LSeqStringSetChar: public LTemplateInstruction<1, 3, 0> { + public: + LSeqStringSetChar(String::Encoding encoding, + LOperand* string, + LOperand* index, + LOperand* value) : encoding_(encoding) { + inputs_[0] = string; + inputs_[1] = index; + inputs_[2] = value; + } + + String::Encoding encoding() { return encoding_; } + LOperand* string() { return inputs_[0]; } + LOperand* index() { return inputs_[1]; } + LOperand* value() { return inputs_[2]; } + + DECLARE_CONCRETE_INSTRUCTION(SeqStringSetChar, "seq-string-set-char") + DECLARE_HYDROGEN_ACCESSOR(SeqStringSetChar) + + private: + String::Encoding encoding_; +}; + + class LThrow: public LTemplateInstruction<0, 2, 0> { public: LThrow(LOperand* context, LOperand* value) { diff --git a/src/json-stringifier.h b/src/json-stringifier.h index 09014f0..7a8af30 100644 --- a/src/json-stringifier.h +++ b/src/json-stringifier.h @@ -597,31 +597,8 @@ BasicJsonStringifier::Result BasicJsonStringifier::SerializeJSObject( void BasicJsonStringifier::ShrinkCurrentPart() { ASSERT(current_index_ < part_length_); - if (current_index_ == 0) { - current_part_ = factory_->empty_string(); - return; - } - - int string_size, allocated_string_size; - if (is_ascii_) { - allocated_string_size = SeqOneByteString::SizeFor(part_length_); - string_size = SeqOneByteString::SizeFor(current_index_); - } else { - allocated_string_size = SeqTwoByteString::SizeFor(part_length_); - string_size = SeqTwoByteString::SizeFor(current_index_); - } - - int delta = allocated_string_size - string_size; - current_part_->set_length(current_index_); - - // String sizes are pointer size aligned, so that we can use filler objects - // that are a multiple of pointer size. - Address end_of_string = current_part_->address() + string_size; - isolate_->heap()->CreateFillerObjectAt(end_of_string, delta); - if (Marking::IsBlack(Marking::MarkBitFrom(*current_part_))) { - MemoryChunk::IncrementLiveBytesFromMutator( - current_part_->address(), -delta); - } + current_part_ = Handle( + SeqString::cast(*current_part_)->Truncate(current_index_), isolate_); } diff --git a/src/macros.py b/src/macros.py index 08fa82e..f871fc5 100644 --- a/src/macros.py +++ b/src/macros.py @@ -32,6 +32,8 @@ const NONE = 0; const READ_ONLY = 1; const DONT_ENUM = 2; const DONT_DELETE = 4; +const NEW_ONE_BYTE_STRING = true; +const NEW_TWO_BYTE_STRING = false; # Constants used for getter and setter operations. const GETTER = 0; diff --git a/src/objects.cc b/src/objects.cc index 2ef142b..a9b58ab 100644 --- a/src/objects.cc +++ b/src/objects.cc @@ -7620,6 +7620,36 @@ bool String::SlowAsArrayIndex(uint32_t* index) { } +String* SeqString::Truncate(int new_length) { + Heap* heap = GetHeap(); + if (new_length <= 0) return heap->empty_string(); + + int string_size, allocated_string_size; + int old_length = length(); + if (old_length <= new_length) return this; + + if (IsSeqOneByteString()) { + allocated_string_size = SeqOneByteString::SizeFor(old_length); + string_size = SeqOneByteString::SizeFor(new_length); + } else { + allocated_string_size = SeqTwoByteString::SizeFor(old_length); + string_size = SeqTwoByteString::SizeFor(new_length); + } + + int delta = allocated_string_size - string_size; + set_length(new_length); + + // String sizes are pointer size aligned, so that we can use filler objects + // that are a multiple of pointer size. + Address end_of_string = address() + string_size; + heap->CreateFillerObjectAt(end_of_string, delta); + if (Marking::IsBlack(Marking::MarkBitFrom(this))) { + MemoryChunk::IncrementLiveBytesFromMutator(address(), -delta); + } + return this; +} + + uint32_t StringHasher::MakeArrayIndexHash(uint32_t value, int length) { // For array indexes mix the length into the hash as an array index could // be zero. diff --git a/src/objects.h b/src/objects.h index eace7b7..7d25e9c 100644 --- a/src/objects.h +++ b/src/objects.h @@ -7173,6 +7173,8 @@ class StringShape BASE_EMBEDDED { // All string values have a length field. class String: public HeapObject { public: + enum Encoding { ONE_BYTE_ENCODING, TWO_BYTE_ENCODING }; + // Representation of the flat content of a String. // A non-flat string doesn't have flat content. // A flat string has content that's encoded as a sequence of either @@ -7569,6 +7571,11 @@ class SeqString: public String { // Layout description. static const int kHeaderSize = String::kSize; + // Truncate the string in-place if possible and return the result. + // In case of new_length == 0, the empty string is returned without + // truncating the original string. + MUST_USE_RESULT String* Truncate(int new_length); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(SeqString); }; diff --git a/src/runtime.cc b/src/runtime.cc index e78086d..9f5e78d 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -5101,46 +5101,22 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToNumber) { } -RUNTIME_FUNCTION(MaybeObject*, Runtime_StringFromCharCodeArray) { - NoHandleAllocation ha; - ASSERT(args.length() == 1); - - CONVERT_ARG_CHECKED(JSArray, codes, 0); - int length = Smi::cast(codes->length())->value(); - - // Check if the string can be ASCII. - int i; - for (i = 0; i < length; i++) { - Object* element; - { MaybeObject* maybe_element = codes->GetElement(i); - // We probably can't get an exception here, but just in order to enforce - // the checking of inputs in the runtime calls we check here. - if (!maybe_element->ToObject(&element)) return maybe_element; - } - CONVERT_NUMBER_CHECKED(int, chr, Int32, element); - if ((chr & 0xffff) > String::kMaxAsciiCharCode) - break; +RUNTIME_FUNCTION(MaybeObject*, Runtime_NewString) { + CONVERT_SMI_ARG_CHECKED(length, 0); + CONVERT_BOOLEAN_ARG_CHECKED(is_one_byte, 1); + if (length == 0) return isolate->heap()->empty_string(); + if (is_one_byte) { + return isolate->heap()->AllocateRawOneByteString(length); + } else { + return isolate->heap()->AllocateRawTwoByteString(length); } +} - MaybeObject* maybe_object = NULL; - if (i == length) { // The string is ASCII. - maybe_object = isolate->heap()->AllocateRawOneByteString(length); - } else { // The string is not ASCII. - maybe_object = isolate->heap()->AllocateRawTwoByteString(length); - } - Object* object = NULL; - if (!maybe_object->ToObject(&object)) return maybe_object; - String* result = String::cast(object); - for (int i = 0; i < length; i++) { - Object* element; - { MaybeObject* maybe_element = codes->GetElement(i); - if (!maybe_element->ToObject(&element)) return maybe_element; - } - CONVERT_NUMBER_CHECKED(int, chr, Int32, element); - result->Set(i, chr & 0xffff); - } - return result; +RUNTIME_FUNCTION(MaybeObject*, Runtime_TruncateString) { + CONVERT_ARG_CHECKED(SeqString, string, 0); + CONVERT_SMI_ARG_CHECKED(new_length, 1); + return string->Truncate(new_length); } diff --git a/src/runtime.h b/src/runtime.h index a52dfb8..7a21bb9 100644 --- a/src/runtime.h +++ b/src/runtime.h @@ -114,7 +114,6 @@ namespace internal { F(Typeof, 1, 1) \ \ F(StringToNumber, 1, 1) \ - F(StringFromCharCodeArray, 1, 1) \ F(StringParseInt, 2, 1) \ F(StringParseFloat, 1, 1) \ F(StringToLowerCase, 1, 1) \ @@ -123,10 +122,6 @@ namespace internal { F(CharFromCode, 1, 1) \ F(URIEscape, 1, 1) \ F(URIUnescape, 1, 1) \ - F(BasicJSONStringify, 1, 1) \ - F(QuoteJSONString, 1, 1) \ - F(QuoteJSONStringComma, 1, 1) \ - F(QuoteJSONStringArray, 1, 1) \ \ F(NumberToString, 1, 1) \ F(NumberToStringSkipCache, 1, 1) \ @@ -195,6 +190,10 @@ namespace internal { \ /* JSON */ \ F(ParseJson, 1, 1) \ + F(BasicJSONStringify, 1, 1) \ + F(QuoteJSONString, 1, 1) \ + F(QuoteJSONStringComma, 1, 1) \ + F(QuoteJSONStringArray, 1, 1) \ \ /* Strings */ \ F(StringCharCodeAt, 2, 1) \ @@ -208,6 +207,8 @@ namespace internal { F(StringTrim, 3, 1) \ F(StringToArray, 2, 1) \ F(NewStringWrapper, 1, 1) \ + F(NewString, 2, 1) \ + F(TruncateString, 2, 1) \ \ /* Numbers */ \ F(NumberToRadixString, 2, 1) \ @@ -530,6 +531,8 @@ namespace internal { F(DateField, 2 /* date object, field index */, 1) \ F(StringCharFromCode, 1, 1) \ F(StringCharAt, 2, 1) \ + F(OneByteSeqStringSetChar, 3, 1) \ + F(TwoByteSeqStringSetChar, 3, 1) \ F(ObjectEquals, 2, 1) \ F(RandomHeapNumber, 0, 1) \ F(IsObject, 1, 1) \ diff --git a/src/string.js b/src/string.js index 1f38194..badfad3 100644 --- a/src/string.js +++ b/src/string.js @@ -810,6 +810,7 @@ function StringTrimRight() { var static_charcode_array = new InternalArray(4); + // ECMA-262, section 15.5.3.2 function StringFromCharCode(code) { var n = %_ArgumentsLength(); @@ -818,17 +819,24 @@ function StringFromCharCode(code) { return %_StringCharFromCode(code & 0xffff); } - // NOTE: This is not super-efficient, but it is necessary because we - // want to avoid converting to numbers from within the virtual - // machine. Maybe we can find another way of doing this? - var codes = static_charcode_array; - for (var i = 0; i < n; i++) { + var one_byte = %NewString(n, NEW_ONE_BYTE_STRING); + var i; + for (i = 0; i < n; i++) { var code = %_Arguments(i); - if (!%_IsSmi(code)) code = ToNumber(code); - codes[i] = code; + if (!%_IsSmi(code)) code = ToNumber(code) & 0xffff; + if (code > 0x7f) break; + %_OneByteSeqStringSetChar(one_byte, i, code); + } + if (i == n) return one_byte; + one_byte = %TruncateString(one_byte, i); + + var two_byte = %NewString(n - i, NEW_TWO_BYTE_STRING); + for (var j = 0; i < n; i++, j++) { + var code = %_Arguments(i); + if (!%_IsSmi(code)) code = ToNumber(code) & 0xffff; + %_TwoByteSeqStringSetChar(two_byte, j, code); } - codes.length = n; - return %StringFromCharCodeArray(codes); + return one_byte + two_byte; } diff --git a/src/uri.js b/src/uri.js index b195f3d..1de22f8 100644 --- a/src/uri.js +++ b/src/uri.js @@ -165,11 +165,11 @@ function URIDecodeOctets(octets, result, index) { throw new $URIError("URI malformed"); } if (value < 0x10000) { - result[index++] = value; + %_TwoByteSeqStringSetChar(result, index++, value); return index; } else { - result[index++] = (value >> 10) + 0xd7c0; - result[index++] = (value & 0x3ff) + 0xdc00; + %_TwoByteSeqStringSetChar(result, index++, (value >> 10) + 0xd7c0); + %_TwoByteSeqStringSetChar(result, index++, (value & 0x3ff) + 0xdc00); return index; } } @@ -178,43 +178,72 @@ function URIDecodeOctets(octets, result, index) { // ECMA-262, section 15.1.3 function Encode(uri, unescape) { var uriLength = uri.length; - // We are going to pass result to %StringFromCharCodeArray - // which does not expect any getters/setters installed - // on the incoming array. - var result = new InternalArray(uriLength); + var array = new InternalArray(uriLength); var index = 0; for (var k = 0; k < uriLength; k++) { var cc1 = uri.charCodeAt(k); if (unescape(cc1)) { - result[index++] = cc1; + array[index++] = cc1; } else { if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw new $URIError("URI malformed"); if (cc1 < 0xD800 || cc1 > 0xDBFF) { - index = URIEncodeSingle(cc1, result, index); + index = URIEncodeSingle(cc1, array, index); } else { k++; if (k == uriLength) throw new $URIError("URI malformed"); var cc2 = uri.charCodeAt(k); if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw new $URIError("URI malformed"); - index = URIEncodePair(cc1, cc2, result, index); + index = URIEncodePair(cc1, cc2, array, index); } } } - return %StringFromCharCodeArray(result); + + var result = %NewString(array.length, NEW_ONE_BYTE_STRING); + for (var i = 0; i < array.length; i++) { + %_OneByteSeqStringSetChar(result, i, array[i]); + } + return result; } // ECMA-262, section 15.1.3 function Decode(uri, reserved) { var uriLength = uri.length; - // We are going to pass result to %StringFromCharCodeArray - // which does not expect any getters/setters installed - // on the incoming array. - var result = new InternalArray(uriLength); + var one_byte = %NewString(uriLength, NEW_ONE_BYTE_STRING); var index = 0; - for (var k = 0; k < uriLength; k++) { - var ch = uri.charAt(k); - if (ch == '%') { + var k = 0; + + // Optimistically assume ascii string. + for ( ; k < uriLength; k++) { + var code = uri.charCodeAt(k); + if (code == 37) { // '%' + if (k + 2 >= uriLength) throw new $URIError("URI malformed"); + var cc = URIHexCharsToCharCode(uri.charCodeAt(k+1), uri.charCodeAt(k+2)); + if (cc >> 7) break; // Assumption wrong, two byte string. + if (reserved(cc)) { + %_OneByteSeqStringSetChar(one_byte, index++, 37); // '%'. + %_OneByteSeqStringSetChar(one_byte, index++, uri.charCodeAt(k+1)); + %_OneByteSeqStringSetChar(one_byte, index++, uri.charCodeAt(k+2)); + } else { + %_OneByteSeqStringSetChar(one_byte, index++, cc); + } + k += 2; + } else { + if (code > 0x7f) break; // Assumption wrong, two byte string. + %_OneByteSeqStringSetChar(one_byte, index++, code); + } + } + + one_byte = %TruncateString(one_byte, index); + if (k == uriLength) return one_byte; + + // Write into two byte string. + var two_byte = %NewString(uriLength - k, NEW_TWO_BYTE_STRING); + index = 0; + + for ( ; k < uriLength; k++) { + var code = uri.charCodeAt(k); + if (code == 37) { // '%' if (k + 2 >= uriLength) throw new $URIError("URI malformed"); var cc = URIHexCharsToCharCode(uri.charCodeAt(++k), uri.charCodeAt(++k)); if (cc >> 7) { @@ -229,22 +258,21 @@ function Decode(uri, reserved) { octets[i] = URIHexCharsToCharCode(uri.charCodeAt(++k), uri.charCodeAt(++k)); } - index = URIDecodeOctets(octets, result, index); + index = URIDecodeOctets(octets, two_byte, index); + } else if (reserved(cc)) { + %_TwoByteSeqStringSetChar(two_byte, index++, 37); // '%'. + %_TwoByteSeqStringSetChar(two_byte, index++, uri.charCodeAt(k - 1)); + %_TwoByteSeqStringSetChar(two_byte, index++, uri.charCodeAt(k)); } else { - if (reserved(cc)) { - result[index++] = 37; // Char code of '%'. - result[index++] = uri.charCodeAt(k - 1); - result[index++] = uri.charCodeAt(k); - } else { - result[index++] = cc; - } + %_TwoByteSeqStringSetChar(two_byte, index++, cc); } } else { - result[index++] = ch.charCodeAt(0); + %_TwoByteSeqStringSetChar(two_byte, index++, code); } } - result.length = index; - return %StringFromCharCodeArray(result); + + two_byte = %TruncateString(two_byte, index); + return one_byte + two_byte; } diff --git a/src/x64/codegen-x64.cc b/src/x64/codegen-x64.cc index 67d1d90..7954604 100644 --- a/src/x64/codegen-x64.cc +++ b/src/x64/codegen-x64.cc @@ -606,6 +606,46 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, } +void SeqStringSetCharGenerator::Generate(MacroAssembler* masm, + String::Encoding encoding, + Register string, + Register index, + Register value) { + if (FLAG_debug_code) { + __ Check(masm->CheckSmi(index), "Non-smi index"); + __ Check(masm->CheckSmi(value), "Non-smi value"); + + __ SmiCompare(index, FieldOperand(string, String::kLengthOffset)); + __ Check(less, "Index is too large"); + + __ SmiCompare(index, Smi::FromInt(0)); + __ Check(greater_equal, "Index is negative"); + + __ push(value); + __ movq(value, FieldOperand(string, HeapObject::kMapOffset)); + __ movzxbq(value, FieldOperand(value, Map::kInstanceTypeOffset)); + + __ andb(value, Immediate(kStringRepresentationMask | kStringEncodingMask)); + static const uint32_t one_byte_seq_type = kSeqStringTag | kOneByteStringTag; + static const uint32_t two_byte_seq_type = kSeqStringTag | kTwoByteStringTag; + __ cmpq(value, Immediate(encoding == String::ONE_BYTE_ENCODING + ? one_byte_seq_type : two_byte_seq_type)); + __ Check(equal, "Unexpected string type"); + __ pop(value); + } + + __ SmiToInteger32(value, value); + __ SmiToInteger32(index, index); + if (encoding == String::ONE_BYTE_ENCODING) { + __ movb(FieldOperand(string, index, times_1, SeqString::kHeaderSize), + value); + } else { + __ movw(FieldOperand(string, index, times_2, SeqString::kHeaderSize), + value); + } +} + + void MathExpGenerator::EmitMathExp(MacroAssembler* masm, XMMRegister input, XMMRegister result, diff --git a/src/x64/full-codegen-x64.cc b/src/x64/full-codegen-x64.cc index 650337a..c87155f 100644 --- a/src/x64/full-codegen-x64.cc +++ b/src/x64/full-codegen-x64.cc @@ -3047,6 +3047,38 @@ void FullCodeGenerator::EmitDateField(CallRuntime* expr) { } +void FullCodeGenerator::EmitOneByteSeqStringSetChar(CallRuntime* expr) { + ZoneList* args = expr->arguments(); + ASSERT_EQ(3, args->length()); + + VisitForStackValue(args->at(1)); // index + VisitForStackValue(args->at(2)); // value + __ pop(rcx); + __ pop(rbx); + VisitForAccumulatorValue(args->at(0)); // string + + static const String::Encoding encoding = String::ONE_BYTE_ENCODING; + SeqStringSetCharGenerator::Generate(masm_, encoding, rax, rbx, rcx); + context()->Plug(rax); +} + + +void FullCodeGenerator::EmitTwoByteSeqStringSetChar(CallRuntime* expr) { + ZoneList* args = expr->arguments(); + ASSERT_EQ(3, args->length()); + + VisitForStackValue(args->at(1)); // index + VisitForStackValue(args->at(2)); // value + __ pop(rcx); + __ pop(rbx); + VisitForAccumulatorValue(args->at(0)); // string + + static const String::Encoding encoding = String::TWO_BYTE_ENCODING; + SeqStringSetCharGenerator::Generate(masm_, encoding, rax, rbx, rcx); + context()->Plug(rax); +} + + void FullCodeGenerator::EmitMathPow(CallRuntime* expr) { // Load the arguments on the stack and call the runtime function. ZoneList* args = expr->arguments(); diff --git a/src/x64/lithium-codegen-x64.cc b/src/x64/lithium-codegen-x64.cc index 3f01d3b..0e5c99b 100644 --- a/src/x64/lithium-codegen-x64.cc +++ b/src/x64/lithium-codegen-x64.cc @@ -1533,6 +1533,15 @@ void LCodeGen::DoDateField(LDateField* instr) { } +void LCodeGen::DoSeqStringSetChar(LSeqStringSetChar* instr) { + SeqStringSetCharGenerator::Generate(masm(), + instr->encoding(), + ToRegister(instr->string()), + ToRegister(instr->index()), + ToRegister(instr->value())); +} + + void LCodeGen::DoBitNotI(LBitNotI* instr) { LOperand* input = instr->value(); ASSERT(input->Equals(instr->result())); diff --git a/src/x64/lithium-x64.cc b/src/x64/lithium-x64.cc index defdafa..919d074 100644 --- a/src/x64/lithium-x64.cc +++ b/src/x64/lithium-x64.cc @@ -1583,6 +1583,17 @@ LInstruction* LChunkBuilder::DoDateField(HDateField* instr) { } +LInstruction* LChunkBuilder::DoSeqStringSetChar(HSeqStringSetChar* instr) { + LOperand* string = UseRegister(instr->string()); + LOperand* index = UseRegister(instr->index()); + ASSERT(rcx.is_byte_register()); + LOperand* value = UseFixed(instr->value(), rcx); + LSeqStringSetChar* result = + new(zone()) LSeqStringSetChar(instr->encoding(), string, index, value); + return DefineSameAsFirst(result); +} + + LInstruction* LChunkBuilder::DoBoundsCheck(HBoundsCheck* instr) { LOperand* value = UseRegisterOrConstantAtStart(instr->index()); LOperand* length = Use(instr->length()); diff --git a/src/x64/lithium-x64.h b/src/x64/lithium-x64.h index b37d2ac..f5f0250 100644 --- a/src/x64/lithium-x64.h +++ b/src/x64/lithium-x64.h @@ -149,6 +149,7 @@ class LCodeGen; V(Random) \ V(RegExpLiteral) \ V(Return) \ + V(SeqStringSetChar) \ V(ShiftI) \ V(SmiTag) \ V(SmiUntag) \ @@ -1160,6 +1161,30 @@ class LDateField: public LTemplateInstruction<1, 1, 0> { }; +class LSeqStringSetChar: public LTemplateInstruction<1, 3, 0> { + public: + LSeqStringSetChar(String::Encoding encoding, + LOperand* string, + LOperand* index, + LOperand* value) : encoding_(encoding) { + inputs_[0] = string; + inputs_[1] = index; + inputs_[2] = value; + } + + String::Encoding encoding() { return encoding_; } + LOperand* string() { return inputs_[0]; } + LOperand* index() { return inputs_[1]; } + LOperand* value() { return inputs_[2]; } + + DECLARE_CONCRETE_INSTRUCTION(SeqStringSetChar, "seq-string-set-char") + DECLARE_HYDROGEN_ACCESSOR(SeqStringSetChar) + + private: + String::Encoding encoding_; +}; + + class LThrow: public LTemplateInstruction<0, 1, 0> { public: explicit LThrow(LOperand* value) { diff --git a/test/mjsunit/fuzz-natives-part1.js b/test/mjsunit/fuzz-natives-part1.js index 29a2abb..f459126 100644 --- a/test/mjsunit/fuzz-natives-part1.js +++ b/test/mjsunit/fuzz-natives-part1.js @@ -198,6 +198,8 @@ var knownProblems = { // Only applicable to strings. "_HasCachedArrayIndex": true, "_GetCachedArrayIndex": true, + "_OneByteSeqStringSetChar": true, + "_TwoByteSeqStringSetChar": true, // Only for debugging parallel recompilation. "InstallRecompiledCode": true, diff --git a/test/mjsunit/fuzz-natives-part2.js b/test/mjsunit/fuzz-natives-part2.js index 6fb8df1..2faad1d 100644 --- a/test/mjsunit/fuzz-natives-part2.js +++ b/test/mjsunit/fuzz-natives-part2.js @@ -196,7 +196,13 @@ var knownProblems = { // Only applicable to strings. "_HasCachedArrayIndex": true, - "_GetCachedArrayIndex": true + "_GetCachedArrayIndex": true, + "_OneByteSeqStringSetChar": true, + "_TwoByteSeqStringSetChar": true, + + // Only for debugging parallel recompilation. + "InstallRecompiledCode": true, + "ForceParallelRecompile": true }; var currentlyUncallable = { diff --git a/test/mjsunit/fuzz-natives-part3.js b/test/mjsunit/fuzz-natives-part3.js index df9bb7e..ed71d33 100644 --- a/test/mjsunit/fuzz-natives-part3.js +++ b/test/mjsunit/fuzz-natives-part3.js @@ -196,7 +196,13 @@ var knownProblems = { // Only applicable to strings. "_HasCachedArrayIndex": true, - "_GetCachedArrayIndex": true + "_GetCachedArrayIndex": true, + "_OneByteSeqStringSetChar": true, + "_TwoByteSeqStringSetChar": true, + + // Only for debugging parallel recompilation. + "InstallRecompiledCode": true, + "ForceParallelRecompile": true }; var currentlyUncallable = { diff --git a/test/mjsunit/fuzz-natives-part4.js b/test/mjsunit/fuzz-natives-part4.js index c340619..1b128d5 100644 --- a/test/mjsunit/fuzz-natives-part4.js +++ b/test/mjsunit/fuzz-natives-part4.js @@ -196,7 +196,13 @@ var knownProblems = { // Only applicable to strings. "_HasCachedArrayIndex": true, - "_GetCachedArrayIndex": true + "_GetCachedArrayIndex": true, + "_OneByteSeqStringSetChar": true, + "_TwoByteSeqStringSetChar": true, + + // Only for debugging parallel recompilation. + "InstallRecompiledCode": true, + "ForceParallelRecompile": true }; var currentlyUncallable = { diff --git a/test/mjsunit/regress/regress-json-stringify-gc.js b/test/mjsunit/regress/regress-json-stringify-gc.js index d732ebc..c0a71bf 100644 --- a/test/mjsunit/regress/regress-json-stringify-gc.js +++ b/test/mjsunit/regress/regress-json-stringify-gc.js @@ -37,5 +37,5 @@ for (var i = 0; i < 10000; i++) a.push(new_space_string); // screw up reading from the correct location. json1 = JSON.stringify(a); json2 = JSON.stringify(a); -assertEquals(json1, json2, "GC caused JSON.stringify to fail."); +assertTrue(json1 == json2, "GC caused JSON.stringify to fail."); diff --git a/test/mjsunit/string-natives.js b/test/mjsunit/string-natives.js new file mode 100644 index 0000000..b1ec875 --- /dev/null +++ b/test/mjsunit/string-natives.js @@ -0,0 +1,72 @@ +// Copyright 2012 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Flags: --expose-gc --allow-natives-syntax + +function test() { + var s1 = %NewString(26, true); + for (i = 0; i < 26; i++) %_OneByteSeqStringSetChar(s1, i, i+65); + assertEquals("ABCDEFGHIJKLMNOPQRSTUVWXYZ", s1); + s1 = %TruncateString(s1, 13); + assertEquals("ABCDEFGHIJKLM", s1); + + var s2 = %NewString(26, false); + for (i = 0; i < 26; i++) %_TwoByteSeqStringSetChar(s2, i, i+65); + assertEquals("ABCDEFGHIJKLMNOPQRSTUVWXYZ", s2); + s2 = %TruncateString(s1, 13); + assertEquals("ABCDEFGHIJKLM", s2); + + var s3 = %NewString(26, false); + for (i = 0; i < 26; i++) %_TwoByteSeqStringSetChar(s3, i, i+1000); + for (i = 0; i < 26; i++) assertEquals(s3[i], String.fromCharCode(i+1000)); + + var a = []; + for (var i = 0; i < 1000; i++) { + var s = %NewString(10000, i % 2 == 1); + a.push(s); + } + + gc(); + + for (var i = 0; i < 1000; i++) { + assertEquals(10000, a[i].length); + a[i] = %TruncateString(a[i], 5000); + } + + gc(); + + for (var i = 0; i < 1000; i++) { + assertEquals(5000, a[i].length); + } +} + + +test(); +test(); +%OptimizeFunctionOnNextCall(test); +test(); + diff --git a/test/mjsunit/uri.js b/test/mjsunit/uri.js index 178ff1f..fae349f 100644 --- a/test/mjsunit/uri.js +++ b/test/mjsunit/uri.js @@ -76,3 +76,15 @@ assertEquals(cc8_2, decodeURI(encodeURI(s8)).charCodeAt(1)); assertEquals(cc9_1, decodeURI(encodeURI(s9)).charCodeAt(0)); assertEquals(cc9_2, decodeURI(encodeURI(s9)).charCodeAt(1)); assertEquals(cc10, decodeURI(encodeURI(s10)).charCodeAt(0)); + +assertEquals("", decodeURI("")); +assertEquals("", encodeURI("")); + +function test(string) { + assertEquals(string, decodeURI(encodeURI(string))); +} + +test("\u1234\u0123\uabcd"); +test("abcd"); +test("ab<\u1234\u0123"); +test("ab\u1234<\u0123");