From 06536894506de7b510df99d9fbae7aa5b83d2191 Mon Sep 17 00:00:00 2001 From: "lrn@chromium.org" Date: Wed, 30 Jun 2010 07:40:40 +0000 Subject: [PATCH] Fix Chromium issue 47824. In rare cases a two-byte string was mistaken for an ascii-string. Review URL: http://codereview.chromium.org/2858033 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4985 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/handles.cc | 10 +++ src/handles.h | 6 ++ src/jsregexp.cc | 18 ++++- src/objects.cc | 2 +- src/regexp-macro-assembler.cc | 4 +- src/runtime.cc | 40 +++++++--- test/cctest/test-api.cc | 171 +++++++++++++++++++++++++++++++----------- 7 files changed, 188 insertions(+), 63 deletions(-) diff --git a/src/handles.cc b/src/handles.cc index c90365c..f2adab7 100644 --- a/src/handles.cc +++ b/src/handles.cc @@ -197,7 +197,17 @@ void TransformToFastProperties(Handle object, void FlattenString(Handle string) { CALL_HEAP_FUNCTION_VOID(string->TryFlatten()); +} + + +Handle FlattenGetString(Handle string) { + Handle result; + CALL_AND_RETRY(string->TryFlatten(), + { result = Handle(String::cast(__object__)); + break; }, + return Handle()); ASSERT(string->IsFlat()); + return result; } diff --git a/src/handles.h b/src/handles.h index 96b17a6..1e14daf 100644 --- a/src/handles.h +++ b/src/handles.h @@ -193,8 +193,14 @@ void NormalizeProperties(Handle object, void NormalizeElements(Handle object); void TransformToFastProperties(Handle object, int unused_property_fields); + +// Flattens a string. void FlattenString(Handle str); +// Flattens a string and returns the underlying external or sequential +// string. +Handle FlattenGetString(Handle str); + Handle SetProperty(Handle object, Handle key, Handle value, diff --git a/src/jsregexp.cc b/src/jsregexp.cc index 3e9c5ea..9f98782 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -356,7 +356,16 @@ int RegExpImpl::IrregexpPrepare(Handle regexp, if (!subject->IsFlat()) { FlattenString(subject); } - bool is_ascii = subject->IsAsciiRepresentation(); + // Check the asciiness of the underlying storage. + bool is_ascii; + { + AssertNoAllocation no_gc; + String* sequential_string = *subject; + if (subject->IsConsString()) { + sequential_string = ConsString::cast(*subject)->first(); + } + is_ascii = sequential_string->IsAsciiRepresentation(); + } if (!EnsureCompiledIrregexp(regexp, is_ascii)) { return -1; } @@ -381,6 +390,11 @@ RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle regexp, ASSERT(index <= subject->length()); ASSERT(subject->IsFlat()); + // A flat ASCII string might have a two-byte first part. + if (subject->IsConsString()) { + subject = Handle(ConsString::cast(*subject)->first()); + } + #ifndef V8_INTERPRETED_REGEXP ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); @@ -407,7 +421,7 @@ RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle regexp, // If result is RETRY, the string has changed representation, and we // must restart from scratch. // In this case, it means we must make sure we are prepared to handle - // the, potentially, differen subject (the string can switch between + // the, potentially, different subject (the string can switch between // being internal and external, and even between being ASCII and UC16, // but the characters are always the same). IrregexpPrepare(regexp, subject); diff --git a/src/objects.cc b/src/objects.cc index 5a057e1..883b28e 100644 --- a/src/objects.cc +++ b/src/objects.cc @@ -678,7 +678,7 @@ Object* String::SlowTryFlatten(PretenureFlag pretenure) { bool String::MakeExternal(v8::String::ExternalStringResource* resource) { - // Externalizing twice leaks the external resouce, so it's + // Externalizing twice leaks the external resource, so it's // prohibited by the API. ASSERT(!this->IsExternalString()); #ifdef DEBUG diff --git a/src/regexp-macro-assembler.cc b/src/regexp-macro-assembler.cc index fc65947..09797ca 100644 --- a/src/regexp-macro-assembler.cc +++ b/src/regexp-macro-assembler.cc @@ -120,8 +120,6 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match( int start_offset = previous_index; int end_offset = subject_ptr->length(); - bool is_ascii = subject->IsAsciiRepresentation(); - // The string has been flattened, so it it is a cons string it contains the // full string in the first part. if (StringShape(subject_ptr).IsCons()) { @@ -129,7 +127,7 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match( subject_ptr = ConsString::cast(subject_ptr)->first(); } // Ensure that an underlying string has the same ascii-ness. - ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii); + bool is_ascii = subject_ptr->IsAsciiRepresentation(); ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString()); // String is now either Sequential or External int char_size_shift = is_ascii ? 0 : 1; diff --git a/src/runtime.cc b/src/runtime.cc index a0f26fc..22e80b3 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -2782,13 +2782,17 @@ int Runtime::StringMatch(Handle sub, // algorithm is unnecessary overhead. if (pattern_length == 1) { AssertNoAllocation no_heap_allocation; // ensure vectors stay valid - if (sub->IsAsciiRepresentation()) { + String* seq_sub = *sub; + if (seq_sub->IsConsString()) { + seq_sub = ConsString::cast(seq_sub)->first(); + } + if (seq_sub->IsAsciiRepresentation()) { uc16 pchar = pat->Get(0); if (pchar > String::kMaxAsciiCharCode) { return -1; } Vector ascii_vector = - sub->ToAsciiVector().SubVector(start_index, subject_length); + seq_sub->ToAsciiVector().SubVector(start_index, subject_length); const void* pos = memchr(ascii_vector.start(), static_cast(pchar), static_cast(ascii_vector.length())); @@ -2798,7 +2802,9 @@ int Runtime::StringMatch(Handle sub, return static_cast(reinterpret_cast(pos) - ascii_vector.start() + start_index); } - return SingleCharIndexOf(sub->ToUC16Vector(), pat->Get(0), start_index); + return SingleCharIndexOf(seq_sub->ToUC16Vector(), + pat->Get(0), + start_index); } if (!pat->IsFlat()) { @@ -2806,19 +2812,29 @@ int Runtime::StringMatch(Handle sub, } AssertNoAllocation no_heap_allocation; // ensure vectors stay valid + // Extract flattened substrings of cons strings before determining asciiness. + String* seq_sub = *sub; + if (seq_sub->IsConsString()) { + seq_sub = ConsString::cast(seq_sub)->first(); + } + String* seq_pat = *pat; + if (seq_pat->IsConsString()) { + seq_pat = ConsString::cast(seq_pat)->first(); + } + // dispatch on type of strings - if (pat->IsAsciiRepresentation()) { - Vector pat_vector = pat->ToAsciiVector(); - if (sub->IsAsciiRepresentation()) { - return StringSearch(sub->ToAsciiVector(), pat_vector, start_index); + if (seq_pat->IsAsciiRepresentation()) { + Vector pat_vector = seq_pat->ToAsciiVector(); + if (seq_sub->IsAsciiRepresentation()) { + return StringSearch(seq_sub->ToAsciiVector(), pat_vector, start_index); } - return StringSearch(sub->ToUC16Vector(), pat_vector, start_index); + return StringSearch(seq_sub->ToUC16Vector(), pat_vector, start_index); } - Vector pat_vector = pat->ToUC16Vector(); - if (sub->IsAsciiRepresentation()) { - return StringSearch(sub->ToAsciiVector(), pat_vector, start_index); + Vector pat_vector = seq_pat->ToUC16Vector(); + if (seq_sub->IsAsciiRepresentation()) { + return StringSearch(seq_sub->ToAsciiVector(), pat_vector, start_index); } - return StringSearch(sub->ToUC16Vector(), pat_vector, start_index); + return StringSearch(seq_sub->ToUC16Vector(), pat_vector, start_index); } diff --git a/test/cctest/test-api.cc b/test/cctest/test-api.cc index 0cf3f7b..be5ecba 100644 --- a/test/cctest/test-api.cc +++ b/test/cctest/test-api.cc @@ -58,7 +58,7 @@ using ::v8::Function; using ::v8::AccessorInfo; using ::v8::Extension; -namespace i = ::v8::internal; +namespace i = ::i; static void ExpectString(const char* code, const char* expected) { @@ -381,11 +381,11 @@ THREADED_TEST(ScriptUsingStringResource) { CHECK(source->IsExternal()); CHECK_EQ(resource, static_cast(source->GetExternalStringResource())); - v8::internal::Heap::CollectAllGarbage(false); + i::Heap::CollectAllGarbage(false); CHECK_EQ(0, TestResource::dispose_count); } - v8::internal::CompilationCache::Clear(); - v8::internal::Heap::CollectAllGarbage(false); + i::CompilationCache::Clear(); + i::Heap::CollectAllGarbage(false); CHECK_EQ(1, TestResource::dispose_count); } @@ -402,11 +402,11 @@ THREADED_TEST(ScriptUsingAsciiStringResource) { Local value = script->Run(); CHECK(value->IsNumber()); CHECK_EQ(7, value->Int32Value()); - v8::internal::Heap::CollectAllGarbage(false); + i::Heap::CollectAllGarbage(false); CHECK_EQ(0, TestAsciiResource::dispose_count); } - v8::internal::CompilationCache::Clear(); - v8::internal::Heap::CollectAllGarbage(false); + i::CompilationCache::Clear(); + i::Heap::CollectAllGarbage(false); CHECK_EQ(1, TestAsciiResource::dispose_count); } @@ -427,11 +427,11 @@ THREADED_TEST(ScriptMakingExternalString) { Local value = script->Run(); CHECK(value->IsNumber()); CHECK_EQ(7, value->Int32Value()); - v8::internal::Heap::CollectAllGarbage(false); + i::Heap::CollectAllGarbage(false); CHECK_EQ(0, TestResource::dispose_count); } - v8::internal::CompilationCache::Clear(); - v8::internal::Heap::CollectAllGarbage(false); + i::CompilationCache::Clear(); + i::Heap::CollectAllGarbage(false); CHECK_EQ(1, TestResource::dispose_count); } @@ -453,11 +453,11 @@ THREADED_TEST(ScriptMakingExternalAsciiString) { Local value = script->Run(); CHECK(value->IsNumber()); CHECK_EQ(7, value->Int32Value()); - v8::internal::Heap::CollectAllGarbage(false); + i::Heap::CollectAllGarbage(false); CHECK_EQ(0, TestAsciiResource::dispose_count); } - v8::internal::CompilationCache::Clear(); - v8::internal::Heap::CollectAllGarbage(false); + i::CompilationCache::Clear(); + i::Heap::CollectAllGarbage(false); CHECK_EQ(1, TestAsciiResource::dispose_count); } @@ -645,11 +645,11 @@ TEST(ExternalStringWithDisposeHandling) { Local value = script->Run(); CHECK(value->IsNumber()); CHECK_EQ(7, value->Int32Value()); - v8::internal::Heap::CollectAllGarbage(false); + i::Heap::CollectAllGarbage(false); CHECK_EQ(0, TestAsciiResource::dispose_count); } - v8::internal::CompilationCache::Clear(); - v8::internal::Heap::CollectAllGarbage(false); + i::CompilationCache::Clear(); + i::Heap::CollectAllGarbage(false); CHECK_EQ(1, TestAsciiResourceWithDisposeControl::dispose_calls); CHECK_EQ(0, TestAsciiResource::dispose_count); @@ -666,11 +666,11 @@ TEST(ExternalStringWithDisposeHandling) { Local value = script->Run(); CHECK(value->IsNumber()); CHECK_EQ(7, value->Int32Value()); - v8::internal::Heap::CollectAllGarbage(false); + i::Heap::CollectAllGarbage(false); CHECK_EQ(0, TestAsciiResource::dispose_count); } - v8::internal::CompilationCache::Clear(); - v8::internal::Heap::CollectAllGarbage(false); + i::CompilationCache::Clear(); + i::Heap::CollectAllGarbage(false); CHECK_EQ(1, TestAsciiResourceWithDisposeControl::dispose_calls); CHECK_EQ(1, TestAsciiResource::dispose_count); } @@ -708,7 +708,7 @@ THREADED_TEST(StringConcat) { CHECK(value->IsNumber()); CHECK_EQ(68, value->Int32Value()); } - v8::internal::CompilationCache::Clear(); + i::CompilationCache::Clear(); i::Heap::CollectAllGarbage(false); i::Heap::CollectAllGarbage(false); } @@ -1881,7 +1881,7 @@ static const char* js_code_causing_out_of_memory = // that come after them so they cannot run in parallel. TEST(OutOfMemory) { // It's not possible to read a snapshot into a heap with different dimensions. - if (v8::internal::Snapshot::IsEnabled()) return; + if (i::Snapshot::IsEnabled()) return; // Set heap limits. static const int K = 1024; v8::ResourceConstraints constraints; @@ -1922,7 +1922,7 @@ v8::Handle ProvokeOutOfMemory(const v8::Arguments& args) { TEST(OutOfMemoryNested) { // It's not possible to read a snapshot into a heap with different dimensions. - if (v8::internal::Snapshot::IsEnabled()) return; + if (i::Snapshot::IsEnabled()) return; // Set heap limits. static const int K = 1024; v8::ResourceConstraints constraints; @@ -1951,7 +1951,7 @@ TEST(OutOfMemoryNested) { TEST(HugeConsStringOutOfMemory) { // It's not possible to read a snapshot into a heap with different dimensions. - if (v8::internal::Snapshot::IsEnabled()) return; + if (i::Snapshot::IsEnabled()) return; v8::HandleScope scope; LocalContext context; // Set heap limits. @@ -6811,7 +6811,7 @@ static v8::Handle InterceptorCallICFastApi(Local name, int* call_count = reinterpret_cast(v8::External::Unwrap(info.Data())); ++(*call_count); if ((*call_count) % 20 == 0) { - v8::internal::Heap::CollectAllGarbage(true); + i::Heap::CollectAllGarbage(true); } return v8::Handle(); } @@ -7620,8 +7620,8 @@ THREADED_TEST(ObjectProtoToString) { bool ApiTestFuzzer::fuzzing_ = false; -v8::internal::Semaphore* ApiTestFuzzer::all_tests_done_= - v8::internal::OS::CreateSemaphore(0); +i::Semaphore* ApiTestFuzzer::all_tests_done_= + i::OS::CreateSemaphore(0); int ApiTestFuzzer::active_tests_; int ApiTestFuzzer::tests_being_run_; int ApiTestFuzzer::current_; @@ -7899,7 +7899,7 @@ THREADED_TEST(LockUnlockLock) { static int GetGlobalObjectsCount() { int count = 0; - v8::internal::HeapIterator it; + i::HeapIterator it; for (i::HeapObject* object = it.next(); object != NULL; object = it.next()) if (object->IsJSGlobalObject()) count++; return count; @@ -7912,11 +7912,11 @@ static int GetSurvivingGlobalObjectsCount() { // the first garbage collection but some of the maps have already // been marked at that point. Therefore some of the maps are not // collected until the second garbage collection. - v8::internal::Heap::CollectAllGarbage(false); - v8::internal::Heap::CollectAllGarbage(false); + i::Heap::CollectAllGarbage(false); + i::Heap::CollectAllGarbage(false); int count = GetGlobalObjectsCount(); #ifdef DEBUG - if (count > 0) v8::internal::Heap::TracePathToGlobal(); + if (count > 0) i::Heap::TracePathToGlobal(); #endif return count; } @@ -10021,7 +10021,7 @@ static void ExternalArrayTestHelper(v8::ExternalArrayType array_type, THREADED_TEST(ExternalByteArray) { - ExternalArrayTestHelper( + ExternalArrayTestHelper( v8::kExternalByteArray, -128, 127); @@ -10029,7 +10029,7 @@ THREADED_TEST(ExternalByteArray) { THREADED_TEST(ExternalUnsignedByteArray) { - ExternalArrayTestHelper( + ExternalArrayTestHelper( v8::kExternalUnsignedByteArray, 0, 255); @@ -10037,7 +10037,7 @@ THREADED_TEST(ExternalUnsignedByteArray) { THREADED_TEST(ExternalShortArray) { - ExternalArrayTestHelper( + ExternalArrayTestHelper( v8::kExternalShortArray, -32768, 32767); @@ -10045,7 +10045,7 @@ THREADED_TEST(ExternalShortArray) { THREADED_TEST(ExternalUnsignedShortArray) { - ExternalArrayTestHelper( + ExternalArrayTestHelper( v8::kExternalUnsignedShortArray, 0, 65535); @@ -10053,7 +10053,7 @@ THREADED_TEST(ExternalUnsignedShortArray) { THREADED_TEST(ExternalIntArray) { - ExternalArrayTestHelper( + ExternalArrayTestHelper( v8::kExternalIntArray, INT_MIN, // -2147483648 INT_MAX); // 2147483647 @@ -10061,7 +10061,7 @@ THREADED_TEST(ExternalIntArray) { THREADED_TEST(ExternalUnsignedIntArray) { - ExternalArrayTestHelper( + ExternalArrayTestHelper( v8::kExternalUnsignedIntArray, 0, UINT_MAX); // 4294967295 @@ -10069,7 +10069,7 @@ THREADED_TEST(ExternalUnsignedIntArray) { THREADED_TEST(ExternalFloatArray) { - ExternalArrayTestHelper( + ExternalArrayTestHelper( v8::kExternalFloatArray, -500, 500); @@ -10547,7 +10547,7 @@ TEST(Regress528) { other_context->Enter(); CompileRun(source_simple); other_context->Exit(); - v8::internal::Heap::CollectAllGarbage(false); + i::Heap::CollectAllGarbage(false); if (GetGlobalObjectsCount() == 1) break; } CHECK_GE(2, gc_count); @@ -10569,7 +10569,7 @@ TEST(Regress528) { other_context->Enter(); CompileRun(source_eval); other_context->Exit(); - v8::internal::Heap::CollectAllGarbage(false); + i::Heap::CollectAllGarbage(false); if (GetGlobalObjectsCount() == 1) break; } CHECK_GE(2, gc_count); @@ -10596,7 +10596,7 @@ TEST(Regress528) { other_context->Enter(); CompileRun(source_exception); other_context->Exit(); - v8::internal::Heap::CollectAllGarbage(false); + i::Heap::CollectAllGarbage(false); if (GetGlobalObjectsCount() == 1) break; } CHECK_GE(2, gc_count); @@ -10859,7 +10859,7 @@ THREADED_TEST(AddToJSFunctionResultCache) { " return 'Different results for ' + key1 + ': ' + r1 + ' vs. ' + r1_;" " return 'PASSED';" "})()"; - v8::internal::Heap::ClearJSFunctionResultCaches(); + i::Heap::ClearJSFunctionResultCaches(); ExpectString(code, "PASSED"); } @@ -10883,7 +10883,7 @@ THREADED_TEST(FillJSFunctionResultCache) { " return 'FAILED: k0CacheSize is too small';" " return 'PASSED';" "})()"; - v8::internal::Heap::ClearJSFunctionResultCaches(); + i::Heap::ClearJSFunctionResultCaches(); ExpectString(code, "PASSED"); } @@ -10908,7 +10908,7 @@ THREADED_TEST(RoundRobinGetFromCache) { " };" " return 'PASSED';" "})()"; - v8::internal::Heap::ClearJSFunctionResultCaches(); + i::Heap::ClearJSFunctionResultCaches(); ExpectString(code, "PASSED"); } @@ -10933,7 +10933,7 @@ THREADED_TEST(ReverseGetFromCache) { " };" " return 'PASSED';" "})()"; - v8::internal::Heap::ClearJSFunctionResultCaches(); + i::Heap::ClearJSFunctionResultCaches(); ExpectString(code, "PASSED"); } @@ -10951,6 +10951,87 @@ THREADED_TEST(TestEviction) { " };" " return 'PASSED';" "})()"; - v8::internal::Heap::ClearJSFunctionResultCaches(); + i::Heap::ClearJSFunctionResultCaches(); ExpectString(code, "PASSED"); } + + +THREADED_TEST(TwoByteStringInAsciiCons) { + // See Chromium issue 47824. + v8::HandleScope scope; + + LocalContext context; + const char* init_code = + "var str1 = 'abelspendabel';" + "var str2 = str1 + str1 + str1;" + "str2;"; + Local result = CompileRun(init_code); + + CHECK(result->IsString()); + i::Handle string = v8::Utils::OpenHandle(String::Cast(*result)); + int length = string->length(); + CHECK(string->IsAsciiRepresentation()); + + FlattenString(string); + i::Handle flat_string = FlattenGetString(string); + + CHECK(string->IsAsciiRepresentation()); + CHECK(flat_string->IsAsciiRepresentation()); + + // Create external resource. + uint16_t* uc16_buffer = new uint16_t[length + 1]; + + i::String::WriteToFlat(*flat_string, uc16_buffer, 0, length); + uc16_buffer[length] = 0; + + TestResource resource(uc16_buffer); + + flat_string->MakeExternal(&resource); + + CHECK(flat_string->IsTwoByteRepresentation()); + + // At this point, we should have a Cons string which is flat and ASCII, + // with a first half that is a two-byte string (although it only contains + // ASCII characters). This is a valid sequence of steps, and it can happen + // in real pages. + + CHECK(string->IsAsciiRepresentation()); + i::ConsString* cons = i::ConsString::cast(*string); + CHECK_EQ(0, cons->second()->length()); + CHECK(cons->first()->IsTwoByteRepresentation()); + + // Check that some string operations work. + + // Atom RegExp. + Local reresult = CompileRun("str2.match(/abel/g).length;"); + CHECK_EQ(6, reresult->Int32Value()); + + // Nonatom RegExp. + reresult = CompileRun("str2.match(/abe./g).length;"); + CHECK_EQ(6, reresult->Int32Value()); + + reresult = CompileRun("str2.search(/bel/g);"); + CHECK_EQ(1, reresult->Int32Value()); + + reresult = CompileRun("str2.search(/be./g);"); + CHECK_EQ(1, reresult->Int32Value()); + + ExpectTrue("/bel/g.test(str2);"); + + ExpectTrue("/be./g.test(str2);"); + + reresult = CompileRun("/bel/g.exec(str2);"); + CHECK(!reresult->IsNull()); + + reresult = CompileRun("/be./g.exec(str2);"); + CHECK(!reresult->IsNull()); + + ExpectString("str2.substring(2, 10);", "elspenda"); + + ExpectString("str2.substring(2, 20);", "elspendabelabelspe"); + + ExpectString("str2.charAt(2);", "e"); + + reresult = CompileRun("str2.charCodeAt(2);"); + CHECK_EQ(static_cast('e'), reresult->Int32Value()); +} -- 2.7.4