From: Ilona Tomkowicz <32700855+ilonatommy@users.noreply.github.com> Date: Wed, 3 May 2023 12:47:16 +0000 (+0200) Subject: [browser][non-icu] `HybridGlobalization` faster encoding for change case. (#85516) X-Git-Tag: accepted/tizen/unified/riscv/20231226.055536~2481 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e15f07357c9af3df2ce55afec87a191d6ac8c137;p=platform%2Fupstream%2Fdotnet%2Fruntime.git [browser][non-icu] `HybridGlobalization` faster encoding for change case. (#85516) * This speeds it up ~twice. * JS's decoder always changes the codepoint. * @kg's optimization idea. --- diff --git a/src/libraries/System.Globalization/tests/System/Globalization/TextInfoTests.cs b/src/libraries/System.Globalization/tests/System/Globalization/TextInfoTests.cs index 11b4dd5..4fba5e1 100644 --- a/src/libraries/System.Globalization/tests/System/Globalization/TextInfoTests.cs +++ b/src/libraries/System.Globalization/tests/System/Globalization/TextInfoTests.cs @@ -325,12 +325,14 @@ namespace System.Globalization.Tests [Fact] public void ToLower_InvalidSurrogates() { + bool usesTextDecoder = PlatformDetection.IsHybridGlobalizationOnBrowser && PlatformDetection.IsBrowserDomSupportedOrNodeJS; + // Invalid UTF-16 in a string (mismatched surrogate pairs) should be unchanged. foreach (string cultureName in new string[] { "", "en-US", "fr" }) { - ToLower(cultureName, "BE CAREFUL, \uD83C\uD83C, THIS ONE IS TRICKY", "be careful, \uD83C\uD83C, this one is tricky"); - ToLower(cultureName, "BE CAREFUL, \uDF08\uD83C, THIS ONE IS TRICKY", "be careful, \uDF08\uD83C, this one is tricky"); - ToLower(cultureName, "BE CAREFUL, \uDF08\uDF08, THIS ONE IS TRICKY", "be careful, \uDF08\uDF08, this one is tricky"); + ToLower(cultureName, "\uD83C\uD83C", usesTextDecoder ? "\uFFFD\uFFFD" : "\uD83C\uD83C"); + ToLower(cultureName, "BE CAREFUL, \uDF08\uD83C, THIS ONE IS TRICKY", usesTextDecoder ? "be careful, \uFFFD\uFFFD, this one is tricky" : "be careful, \uDF08\uD83C, this one is tricky"); + ToLower(cultureName, "BE CAREFUL, \uDF08\uDF08, THIS ONE IS TRICKY", usesTextDecoder ? "be careful, \uFFFD\uFFFD, this one is tricky" : "be careful, \uDF08\uDF08, this one is tricky"); } } @@ -452,12 +454,14 @@ namespace System.Globalization.Tests [Fact] public void ToUpper_InvalidSurrogates() { + bool usesTextDecoder = PlatformDetection.IsHybridGlobalizationOnBrowser && PlatformDetection.IsBrowserDomSupportedOrNodeJS; + // Invalid UTF-16 in a string (mismatched surrogate pairs) should be unchanged. foreach (string cultureName in new string[] { "", "en-US", "fr"}) { - ToUpper(cultureName, "be careful, \uD83C\uD83C, this one is tricky", "BE CAREFUL, \uD83C\uD83C, THIS ONE IS TRICKY"); - ToUpper(cultureName, "be careful, \uDF08\uD83C, this one is tricky", "BE CAREFUL, \uDF08\uD83C, THIS ONE IS TRICKY"); - ToUpper(cultureName, "be careful, \uDF08\uDF08, this one is tricky", "BE CAREFUL, \uDF08\uDF08, THIS ONE IS TRICKY"); + ToUpper(cultureName, "be careful, \uD83C\uD83C, this one is tricky", usesTextDecoder ? "BE CAREFUL, \uFFFD\uFFFD, THIS ONE IS TRICKY" : "BE CAREFUL, \uD83C\uD83C, THIS ONE IS TRICKY"); + ToUpper(cultureName, "be careful, \uDF08\uD83C, this one is tricky", usesTextDecoder ? "BE CAREFUL, \uFFFD\uFFFD, THIS ONE IS TRICKY" : "BE CAREFUL, \uDF08\uD83C, THIS ONE IS TRICKY"); + ToUpper(cultureName, "be careful, \uDF08\uDF08, this one is tricky", usesTextDecoder ? "BE CAREFUL, \uFFFD\uFFFD, THIS ONE IS TRICKY" : "BE CAREFUL, \uDF08\uDF08, THIS ONE IS TRICKY"); } } diff --git a/src/mono/wasm/runtime/hybrid-globalization.ts b/src/mono/wasm/runtime/hybrid-globalization.ts index 62ecac2..66cc3eb 100644 --- a/src/mono/wasm/runtime/hybrid-globalization.ts +++ b/src/mono/wasm/runtime/hybrid-globalization.ts @@ -1,24 +1,23 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -import { Module } from "./globals"; import { mono_wasm_new_external_root } from "./roots"; import { MonoString, MonoStringRef } from "./types"; import { Int32Ptr } from "./types/emscripten"; import { conv_string_root, js_string_to_mono_string_root, string_decoder } from "./strings"; -import { setU16 } from "./memory"; +import { setU16_unchecked } from "./memory"; -export function mono_wasm_change_case_invariant(exceptionMessage: Int32Ptr, src: number, srcLength: number, dst: number, dstLength: number, toUpper: number): void { - try { - const input = get_utf16_string(src, srcLength); +export function mono_wasm_change_case_invariant(exceptionMessage: Int32Ptr, src: number, srcLength: number, dst: number, dstLength: number, toUpper: number) : void{ + try{ + const input = string_decoder.decode(src, (src + 2*srcLength)); let result = toUpper ? input.toUpperCase() : input.toLowerCase(); // Unicode defines some codepoints which expand into multiple codepoints, // originally we do not support this expansion if (result.length > dstLength) result = input; - for (let i = 0; i < result.length; i++) - setU16(dst + i * 2, result.charCodeAt(i)); + for (let i = 0, j = dst; i < result.length; i++, j += 2) + setU16_unchecked(j, result.charCodeAt(i)); } catch (ex: any) { pass_exception_details(ex, exceptionMessage); @@ -31,13 +30,13 @@ export function mono_wasm_change_case(exceptionMessage: Int32Ptr, culture: MonoS const cultureName = conv_string_root(cultureRoot); if (!cultureName) throw new Error("Cannot change case, the culture name is null."); - const input = get_utf16_string(src, srcLength); + const input = string_decoder.decode(src, (src + 2*srcLength)); let result = toUpper ? input.toLocaleUpperCase(cultureName) : input.toLocaleLowerCase(cultureName); if (result.length > destLength) result = input; - for (let i = 0; i < destLength; i++) - setU16(dst + i * 2, result.charCodeAt(i)); + for (let i = 0, j = dst; i < result.length; i++, j += 2) + setU16_unchecked(j, result.charCodeAt(i)); } catch (ex: any) { pass_exception_details(ex, exceptionMessage); @@ -47,14 +46,6 @@ export function mono_wasm_change_case(exceptionMessage: Int32Ptr, culture: MonoS } } -function get_utf16_string(ptr: number, length: number): string { - const view = new Uint16Array(Module.HEAPU16.buffer, ptr, length); - let string = ""; - for (let i = 0; i < length; i++) - string += String.fromCharCode(view[i]); - return string; -} - export function mono_wasm_compare_string(exceptionMessage: Int32Ptr, culture: MonoStringRef, str1: number, str1Length: number, str2: number, str2Length: number, options: number): number { const cultureRoot = mono_wasm_new_external_root(culture); try { diff --git a/src/mono/wasm/runtime/memory.ts b/src/mono/wasm/runtime/memory.ts index dc3c433..5ab35b8 100644 --- a/src/mono/wasm/runtime/memory.ts +++ b/src/mono/wasm/runtime/memory.ts @@ -2,10 +2,10 @@ // The .NET Foundation licenses this file to you under the MIT license. import monoWasmThreads from "consts:monoWasmThreads"; -import { Module, runtimeHelpers } from "./globals"; import { mono_assert, MemOffset, NumberOrPointer } from "./types"; import { VoidPtr, CharPtr } from "./types/emscripten"; import cwraps, { I52Error } from "./cwraps"; +import { Module, runtimeHelpers } from "./globals"; const alloca_stack: Array = []; const alloca_buffer_size = 32 * 1024; @@ -73,6 +73,10 @@ export function setU16(offset: MemOffset, value: number): void { Module.HEAPU16[offset >>> 1] = value; } +export function setU16_unchecked(offset: MemOffset, value: number): void { + Module.HEAPU16[offset >>> 1] = value; +} + export function setU32_unchecked(offset: MemOffset, value: NumberOrPointer): void { Module.HEAPU32[offset >>> 2] = value; }