src/base/strings/utf_string_conversions.cc

   1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/strings/utf_string_conversions.h"
   6
   7 #include "base/strings/string_piece.h"
   8 #include "base/strings/string_util.h"
   9 #include "base/strings/utf_string_conversion_utils.h"
  10
  11 namespace base {
  12
  13 namespace {
  14
  15 // Generalized Unicode converter -----------------------------------------------
  16
  17 // Converts the given source Unicode character type to the given destination
  18 // Unicode character type as a STL string. The given input buffer and size
  19 // determine the source, and the given output STL string will be replaced by
  20 // the result.
  21 template<typename SRC_CHAR, typename DEST_STRING>
  22 bool ConvertUnicode(const SRC_CHAR* src,
  23                     size_t src_len,
  24                     DEST_STRING* output) {
  25   // ICU requires 32-bit numbers.
  26   bool success = true;
  27   int32 src_len32 = static_cast<int32>(src_len);
  28   for (int32 i = 0; i < src_len32; i++) {
  29     uint32 code_point;
  30     if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
  31       WriteUnicodeCharacter(code_point, output);
  32     } else {
  33       WriteUnicodeCharacter(0xFFFD, output);
  34       success = false;
  35     }
  36   }
  37
  38   return success;
  39 }
  40
  41 }  // namespace
  42
  43 // UTF-8 <-> Wide --------------------------------------------------------------
  44
  45 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
  46   if (IsStringASCII(std::wstring(src, src_len))) {
  47     output->assign(src, src + src_len);
  48     return true;
  49   } else {
  50     PrepareForUTF8Output(src, src_len, output);
  51     return ConvertUnicode(src, src_len, output);
  52   }
  53 }
  54
  55 std::string WideToUTF8(const std::wstring& wide) {
  56   if (IsStringASCII(wide)) {
  57     return std::string(wide.data(), wide.data() + wide.length());
  58   }
  59
  60   std::string ret;
  61   PrepareForUTF8Output(wide.data(), wide.length(), &ret);
  62   ConvertUnicode(wide.data(), wide.length(), &ret);
  63   return ret;
  64 }
  65
  66 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
  67   if (IsStringASCII(StringPiece(src, src_len))) {
  68     output->assign(src, src + src_len);
  69     return true;
  70   } else {
  71     PrepareForUTF16Or32Output(src, src_len, output);
  72     return ConvertUnicode(src, src_len, output);
  73   }
  74 }
  75
  76 std::wstring UTF8ToWide(const StringPiece& utf8) {
  77   if (IsStringASCII(utf8)) {
  78     return std::wstring(utf8.begin(), utf8.end());
  79   }
  80
  81   std::wstring ret;
  82   PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret);
  83   ConvertUnicode(utf8.data(), utf8.length(), &ret);
  84   return ret;
  85 }
  86
  87 // UTF-16 <-> Wide -------------------------------------------------------------
  88
  89 #if defined(WCHAR_T_IS_UTF16)
  90
  91 // When wide == UTF-16, then conversions are a NOP.
  92 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
  93   output->assign(src, src_len);
  94   return true;
  95 }
  96
  97 string16 WideToUTF16(const std::wstring& wide) {
  98   return wide;
  99 }
 100
 101 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
 102   output->assign(src, src_len);
 103   return true;
 104 }
 105
 106 std::wstring UTF16ToWide(const string16& utf16) {
 107   return utf16;
 108 }
 109
 110 #elif defined(WCHAR_T_IS_UTF32)
 111
 112 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
 113   output->clear();
 114   // Assume that normally we won't have any non-BMP characters so the counts
 115   // will be the same.
 116   output->reserve(src_len);
 117   return ConvertUnicode(src, src_len, output);
 118 }
 119
 120 string16 WideToUTF16(const std::wstring& wide) {
 121   string16 ret;
 122   WideToUTF16(wide.data(), wide.length(), &ret);
 123   return ret;
 124 }
 125
 126 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
 127   output->clear();
 128   // Assume that normally we won't have any non-BMP characters so the counts
 129   // will be the same.
 130   output->reserve(src_len);
 131   return ConvertUnicode(src, src_len, output);
 132 }
 133
 134 std::wstring UTF16ToWide(const string16& utf16) {
 135   std::wstring ret;
 136   UTF16ToWide(utf16.data(), utf16.length(), &ret);
 137   return ret;
 138 }
 139
 140 #endif  // defined(WCHAR_T_IS_UTF32)
 141
 142 // UTF16 <-> UTF8 --------------------------------------------------------------
 143
 144 #if defined(WCHAR_T_IS_UTF32)
 145
 146 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
 147   if (IsStringASCII(StringPiece(src, src_len))) {
 148     output->assign(src, src + src_len);
 149     return true;
 150   } else {
 151     PrepareForUTF16Or32Output(src, src_len, output);
 152     return ConvertUnicode(src, src_len, output);
 153   }
 154 }
 155
 156 string16 UTF8ToUTF16(const StringPiece& utf8) {
 157   if (IsStringASCII(utf8)) {
 158     return string16(utf8.begin(), utf8.end());
 159   }
 160
 161   string16 ret;
 162   PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret);
 163   // Ignore the success flag of this call, it will do the best it can for
 164   // invalid input, which is what we want here.
 165   ConvertUnicode(utf8.data(), utf8.length(), &ret);
 166   return ret;
 167 }
 168
 169 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
 170   if (IsStringASCII(StringPiece16(src, src_len))) {
 171     output->assign(src, src + src_len);
 172     return true;
 173   } else {
 174     PrepareForUTF8Output(src, src_len, output);
 175     return ConvertUnicode(src, src_len, output);
 176   }
 177 }
 178
 179 std::string UTF16ToUTF8(const string16& utf16) {
 180   if (IsStringASCII(utf16)) {
 181     return std::string(utf16.begin(), utf16.end());
 182   }
 183
 184   std::string ret;
 185   // Ignore the success flag of this call, it will do the best it can for
 186   // invalid input, which is what we want here.
 187   UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
 188   return ret;
 189 }
 190
 191 #elif defined(WCHAR_T_IS_UTF16)
 192 // Easy case since we can use the "wide" versions we already wrote above.
 193
 194 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
 195   return UTF8ToWide(src, src_len, output);
 196 }
 197
 198 string16 UTF8ToUTF16(const StringPiece& utf8) {
 199   return UTF8ToWide(utf8);
 200 }
 201
 202 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
 203   return WideToUTF8(src, src_len, output);
 204 }
 205
 206 std::string UTF16ToUTF8(const string16& utf16) {
 207   return WideToUTF8(utf16);
 208 }
 209
 210 #endif
 211
 212 std::wstring ASCIIToWide(const StringPiece& ascii) {
 213   DCHECK(IsStringASCII(ascii)) << ascii;
 214   return std::wstring(ascii.begin(), ascii.end());
 215 }
 216
 217 string16 ASCIIToUTF16(const StringPiece& ascii) {
 218   DCHECK(IsStringASCII(ascii)) << ascii;
 219   return string16(ascii.begin(), ascii.end());
 220 }
 221
 222 std::string UTF16ToASCII(const string16& utf16) {
 223   DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16);
 224   return std::string(utf16.begin(), utf16.end());
 225 }
 226
 227 }  // namespace base