From: Jean Perier Date: Tue, 28 May 2019 09:15:05 +0000 (-0700) Subject: [flang] Fix UTF-8 bugs and add related tests X-Git-Tag: 2020.06-alpha~50^2~2673^2~1155 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bc30bef24b8cb724b634bf650741ea01e7730748;p=platform%2Fupstream%2Fllvm.git [flang] Fix UTF-8 bugs and add related tests Original-commit: flang-compiler/f18@9dd19ede9ed76570653d698bd1261950e74dda4b Reviewed-on: https://github.com/flang-compiler/f18/pull/471 Tree-same-pre-rewrite: false --- diff --git a/flang/lib/parser/characters.cc b/flang/lib/parser/characters.cc index 5b8348f5b142..e92c1d1a40aa 100644 --- a/flang/lib/parser/characters.cc +++ b/flang/lib/parser/characters.cc @@ -88,8 +88,15 @@ std::string QuoteCharacterLiteralHelper( std::string result{'"'}; const auto emit{[&](char ch) { result += ch; }}; for (auto ch : str) { - char32_t ch32{static_cast(ch)}; - EmitQuotedChar(ch32, emit, emit, doubleDoubleQuotes, doubleBackslash); + using CharT = std::decay_t; + if constexpr (std::is_same_v) { + // char may be signed depending on host. + char32_t ch32{static_cast(ch)}; + EmitQuotedChar(ch32, emit, emit, doubleDoubleQuotes, doubleBackslash); + } else { + char32_t ch32{ch}; + EmitQuotedChar(ch32, emit, emit, doubleDoubleQuotes, doubleBackslash); + } } result += '"'; return result; @@ -136,6 +143,7 @@ std::optional DecodeUTF8(const std::string &s) { return std::nullopt; // not valid UTF-8 } } + result.append(1, ch); bytes -= charBytes; } return {result}; diff --git a/flang/lib/parser/prescan.cc b/flang/lib/parser/prescan.cc index 4ccb32ad047e..049cc7f249b5 100644 --- a/flang/lib/parser/prescan.cc +++ b/flang/lib/parser/prescan.cc @@ -545,7 +545,7 @@ void Prescanner::QuotedCharacterLiteral( bool escape{false}; bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)}; while (true) { - char ch{*at_}; + unsigned char ch{static_cast(*at_)}; escape = !escape && ch == '\\' && escapesEnabled; EmitQuotedChar(ch, emit, insert, false, !escapesEnabled); while (PadOutCharacterLiteral(tokens)) { diff --git a/flang/test/semantics/CMakeLists.txt b/flang/test/semantics/CMakeLists.txt index 36c10e2071fe..dc64f5e767d2 100644 --- a/flang/test/semantics/CMakeLists.txt +++ b/flang/test/semantics/CMakeLists.txt @@ -175,6 +175,7 @@ set(MODFILE_TESTS modfile25.f90 modfile26.f90 modfile27.f90 + modfile28.f90 ) set(LABEL_TESTS diff --git a/flang/test/semantics/modfile28.f90 b/flang/test/semantics/modfile28.f90 new file mode 100644 index 000000000000..fa07ebf1fadc --- /dev/null +++ b/flang/test/semantics/modfile28.f90 @@ -0,0 +1,34 @@ + +! Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +! +! Licensed under the Apache License, Version 2.0 (the "License"); +! you may not use this file except in compliance with the License. +! You may obtain a copy of the License at +! +! http://www.apache.org/licenses/LICENSE-2.0 +! +! Unless required by applicable law or agreed to in writing, software +! distributed under the License is distributed on an "AS IS" BASIS, +! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +! See the License for the specific language governing permissions and +! limitations under the License. + +! Test UTF-8 support in character literals +! TODO: test EUC-JP + +module m +character(kind=4,len=:), parameter :: c4 = 4_"Hi! 你好!" +character(kind=1,len=:), parameter :: c1 = 1_"Hi! 你好!" +character(kind=4,len=:), parameter :: c4a(:) = [4_"一", 4_"二", 4_"三", 4_"四", 4_"五"] +integer, parameter :: lc4 = len(c4) +integer, parameter :: lc1 = len(c1) +end module m + +!Expect: m.mod +!module m +!character(:,4),parameter::c4=4_"Hi! 你好!" +!character(:,1),parameter::c1=1_"Hi! \344\275\240\345\245\275!" +!character(:,4),parameter::c4a(1_8:)=[CHARACTER(KIND=4,LEN=1)::"一","二","三","四","五"] +!integer(4),parameter::lc4=7_4 +!integer(4),parameter::lc1=11_4 +!end