Fix raw TemplateLiteral spans with non-ascii characters
authorcaitpotter88 <caitpotter88@gmail.com>
Wed, 26 Nov 2014 17:15:47 +0000 (09:15 -0800)
committerCommit bot <commit-bot@chromium.org>
Wed, 26 Nov 2014 17:15:54 +0000 (17:15 +0000)
BUG=v8:3710

Review URL: https://codereview.chromium.org/745233002

Cr-Commit-Position: refs/heads/master@{#25529}

src/parser.cc
test/mjsunit/harmony/templates.js

index 159fee8..c2d6c57 100644 (file)
@@ -5334,9 +5334,23 @@ ZoneList<Expression*>* Parser::TemplateRawStrings(const TemplateLiteral* lit,
       raw_chars[to_index++] = ch;
     }
 
-    const AstRawString* raw_str = ast_value_factory()->GetOneByteString(
-        OneByteVector(raw_chars.get(), to_index));
-    Literal* raw_lit = factory()->NewStringLiteral(raw_str, span_start - 1);
+    Access<UnicodeCache::Utf8Decoder>
+        decoder(isolate()->unicode_cache()->utf8_decoder());
+    decoder->Reset(raw_chars.get(), to_index);
+    int utf16_length = decoder->Utf16Length();
+    Literal* raw_lit = NULL;
+    if (utf16_length > 0) {
+      uc16* utf16_buffer = zone()->NewArray<uc16>(utf16_length);
+      to_index = decoder->WriteUtf16(utf16_buffer, utf16_length);
+      const uint16_t* data = reinterpret_cast<const uint16_t*>(utf16_buffer);
+      const AstRawString* raw_str = ast_value_factory()->GetTwoByteString(
+          Vector<const uint16_t>(data, to_index));
+      raw_lit = factory()->NewStringLiteral(raw_str, span_start - 1);
+    } else {
+      raw_lit = factory()->NewStringLiteral(
+          ast_value_factory()->empty_string(), span_start - 1);
+    }
+    DCHECK_NOT_NULL(raw_lit);
     raw_strings->Add(raw_lit, zone());
   }
 
index 3baf05e..ee37d82 100644 (file)
@@ -401,9 +401,21 @@ var obj = {
   assertEquals("안녕", callSites[0][0]);
   assertEquals("\\uc548\\ub155", callSites[0].raw[0]);
   assertEquals("안녕", callSites[1][0]);
-  // TODO(caitp, arv): blocked on correctly generating raw strings from
-  // multi-byte UTF8.
-  // assertEquals("안녕", callSites[1].raw[0]);
+  assertEquals("안녕", callSites[1].raw[0]);
+
+  // Extra-thorough UTF8 decoding test.
+  callSites = [];
+
+  tag`Iñtërnâtiônàlizætiøn\u2603\uD83D\uDCA9`;
+  tag`Iñtërnâtiônàlizætiøn☃💩`;
+
+  assertEquals(2, callSites.length);
+  assertTrue(callSites[0] !== callSites[1]);
+  assertEquals("Iñtërnâtiônàlizætiøn☃💩", callSites[0][0]);
+  assertEquals(
+      "Iñtërnâtiônàlizætiøn\\u2603\\uD83D\\uDCA9", callSites[0].raw[0]);
+  assertEquals("Iñtërnâtiônàlizætiøn☃💩", callSites[1][0]);
+  assertEquals("Iñtërnâtiônàlizætiøn☃💩", callSites[1].raw[0]);
 })();