From ab7b081f9095d09bb72e97a1c4aafcdeb2640624 Mon Sep 17 00:00:00 2001 From: Jakub Skowron Date: Wed, 18 Jan 2017 09:10:23 +0100 Subject: [PATCH] [Filesystem] Fix SMP support in UTF-8 encoding on write Support supplementary planes, including Emoji. [Verification] readASText() is working correctly for P170111-07658 issue. Change-Id: I6f75f397e167ea9c89b4298edcaa19352586c90d Signed-off-by: Jakub Skowron --- src/filesystem/js/base64.js | 42 +++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/src/filesystem/js/base64.js b/src/filesystem/js/base64.js index 034ce333..d2868f3e 100755 --- a/src/filesystem/js/base64.js +++ b/src/filesystem/js/base64.js @@ -16,13 +16,8 @@ var Base64 = { _b64: 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=', - encodeString: function(data) { - data = this._utf8_encode(data); - var input = []; - for (var i = 0; i < data.length; ++i) { - input.push(data.charCodeAt(i)); - } - return this.encode(input); + encodeString: function(str) { + return this.encode(this._utf8_encode(str)); }, encode: function(data) { var output = ''; @@ -90,28 +85,29 @@ var Base64 = { }, _utf8_encode: function(str) { str = str.replace(/\r\n/g, '\n'); - var utftext = ''; - - for (var n = 0; n < str.length; n++) { + var utfarray = []; - var c = str.charCodeAt(n); + //TODO: use for( var c of str ) in future versions + for (var offset = 0; offset < str.length; offset++) { + var code = str.codePointAt(offset); - if (c < 128) { - utftext += String.fromCharCode(c); + if (code <= 0x7F) { + utfarray.push(code); } - else if ((c > 127) && (c < 2048)) { - utftext += String.fromCharCode((c >> 6) | 192); - utftext += String.fromCharCode((c & 63) | 128); + else if (code <= 0x7FF) { + utfarray.push( 0xC0 | (code >> 6), 0x80 | (code & 0x3F) ); + } + else if (code <= 0xFFFF) { + utfarray.push( 0xE0 | (code >> 12), 0x80 | ((code >> 6) & 0x3F), 0x80 | (code & 0x3F) ); } else { - utftext += String.fromCharCode((c >> 12) | 224); - utftext += String.fromCharCode(((c >> 6) & 63) | 128); - utftext += String.fromCharCode((c & 63) | 128); + utfarray.push( 0xF0 | (code >> 18), 0x80 | ((code >> 12) & 0x3F), + 0x80 | ((code >> 6) & 0x3F), 0x80 | (code & 0x3F) ); + offset++; //there is a UTF16 surrogate pair in str, so jump two elements } - } - return utftext; + return utfarray; }, _utf8_decode: function(utfarray) { var str = ''; @@ -125,12 +121,12 @@ var Base64 = { str += String.fromCharCode(c); i++; } - else if ((c > 191) && (c < 224)) { + else if ((c >= 192) && (c < 224)) { c1 = utfarray[i + 1]; str += String.fromCharCode(((c & 31) << 6) | (c1 & 63)); i += 2; } - else if((c > 223) && (c < 240)) { + else if((c >= 224) && (c < 240)) { c1 = utfarray[i + 1]; c2 = utfarray[i + 2]; str += String.fromCharCode(((c & 15) << 12) | ((c1 & 63) << 6) | (c2 & 63)); -- 2.34.1