2 * Copyright (c) 2011 Samsung Electronics Co., Ltd All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * @author Piotr Marcinkiewicz (p.marcinkiew@samsung.com)
19 * @author Przemyslaw Dobrowolski (p.dobrowolsk@samsung.com)
22 #include <dpl/string.h>
23 #include <dpl/char_traits.h>
24 #include <dpl/errno_string.h>
25 #include <dpl/exception.h>
26 #include <dpl/scoped_array.h>
27 #include <dpl/log/log.h>
34 #include <unicode/ustring.h>
36 // TODO: Completely move to ICU
43 const std::string& m_TestedString;
46 ASCIIValidator(const std::string& aTestedString);
48 void operator()(char aCharacter) const;
51 ASCIIValidator::ASCIIValidator(const std::string& aTestedString)
52 : m_TestedString(aTestedString)
56 void ASCIIValidator::operator()(char aCharacter) const
58 // Check for ASCII data range
61 ThrowMsg(StringException::InvalidASCIICharacter,
62 "invalid character code " << static_cast<int>(aCharacter)
63 << " from string [" << m_TestedString
64 << "] passed as ASCII");
68 const iconv_t gc_IconvOperError = reinterpret_cast<iconv_t>(-1);
69 const size_t gc_IconvConvertError = static_cast<size_t>(-1);
70 } // namespace anonymous
72 String FromUTF8String(const std::string& aIn)
78 size_t inbytes = aIn.size();
80 // Default iconv UTF-32 module adds BOM (4 bytes) in from of string
81 // The worst case is when 8bit UTF-8 char converts to 32bit UTF-32
82 // newsize = oldsize * 4 + end + bom
83 // newsize - bytes for UTF-32 string
84 // oldsize - letters in UTF-8 string
85 // end - end character for UTF-32 (\0)
86 // bom - Unicode header in front of string (0xfeff)
87 size_t outbytes = sizeof(wchar_t)*(inbytes + 2);
88 std::vector<wchar_t> output(inbytes + 2, 0);
90 size_t outbytesleft = outbytes;
91 char* inbuf = const_cast<char*>(aIn.c_str());
93 // vector is used to provide buffer for iconv which expects char* buffer
94 // but during conversion from UTF32 uses internaly wchar_t
95 char* outbuf = reinterpret_cast<char*>(&output[0]);
97 iconv_t iconvHandle = iconv_open("UTF-32","UTF-8");
99 if (gc_IconvOperError == iconvHandle)
103 ThrowMsg(StringException::IconvInitErrorUTF8ToUTF32,
104 "iconv_open failed for " << "UTF-32 <- UTF-8" <<
105 "error: " << GetErrnoString(error));
108 size_t iconvRet = iconv(iconvHandle, &inbuf, &inbytes, &outbuf, &outbytesleft);
110 iconv_close(iconvHandle);
112 if (gc_IconvConvertError == iconvRet)
114 ThrowMsg(StringException::IconvConvertErrorUTF8ToUTF32,
115 "iconv failed for " << "UTF-32 <- UTF-8" << "error: "
116 << GetErrnoString());
120 // Ignore BOM in front of UTF-32
124 std::string ToUTF8String(const DPL::String& aIn)
128 return std::string();
130 size_t inbytes = aIn.size() * sizeof(wchar_t);
131 size_t outbytes = inbytes + sizeof(char);
133 // wstring returns wchar_t but iconv expects char*
134 // iconv internally is processing input as wchar_t
135 char* inbuf = reinterpret_cast<char*>(const_cast<wchar_t*>(aIn.c_str()));
136 std::vector<char> output(inbytes, 0);
137 char* outbuf = &output[0];
139 size_t outbytesleft = outbytes;
141 iconv_t iconvHandle = iconv_open("UTF-8","UTF-32");
143 if (gc_IconvOperError == iconvHandle)
145 ThrowMsg(StringException::IconvInitErrorUTF32ToUTF8,
146 "iconv_open failed for " << "UTF-8 <- UTF-32"
147 << "error: " << GetErrnoString());
148 return std::string();
151 size_t iconvRet = iconv(iconvHandle, &inbuf, &inbytes, &outbuf, &outbytesleft);
153 iconv_close(iconvHandle);
155 if (gc_IconvConvertError == iconvRet)
157 ThrowMsg(StringException::IconvConvertErrorUTF32ToUTF8,
158 "iconv failed for " << "UTF-8 <- UTF-32"
159 << "error: " << GetErrnoString());
160 return std::string();
166 String FromASCIIString(const std::string& aString)
170 std::for_each(aString.begin(), aString.end(), ASCIIValidator(aString));
171 std::copy(aString.begin(), aString.end(), std::back_inserter<String>(output));
176 String FromUTF32String(const std::wstring& aString)
178 return String(&aString[0]);
181 static UChar *ConvertToICU(const String &inputString)
183 ScopedArray<UChar> outputString;
185 int32_t convertedSize = 0;
186 UErrorCode error = U_ZERO_ERROR;
188 // Calculate size of output string
196 if (error == U_ZERO_ERROR ||
197 error == U_BUFFER_OVERFLOW_ERROR)
199 // What buffer size is ok ?
200 LogPedantic("ICU: Output buffer size: " << size);
204 ThrowMsg(StringException::ICUInvalidCharacterFound,
205 "ICU: Failed to retrieve output string size. Error: "
209 // Allocate proper buffer
210 outputString.Reset(new UChar[size + 1]);
211 ::memset(outputString.Get(), 0, sizeof(UChar) * (size + 1));
213 error = U_ZERO_ERROR;
216 ::u_strFromWCS(outputString.Get(),
223 if (!U_SUCCESS(error))
225 ThrowMsg(StringException::ICUInvalidCharacterFound,
226 "ICU: Failed to convert string. Error: " << error);
230 return outputString.Release();
233 int StringCompare(const String &left,
235 bool caseInsensitive)
237 // Convert input strings
238 ScopedArray<UChar> leftICU(ConvertToICU(left));
239 ScopedArray<UChar> rightICU(ConvertToICU(right));
243 return static_cast<int>(u_strcasecmp(leftICU.Get(), rightICU.Get(), 0));
247 return static_cast<int>(u_strcmp(leftICU.Get(), rightICU.Get()));
252 std::ostream& operator<<(std::ostream& aStream, const DPL::String& aString)
254 return aStream << DPL::ToUTF8String(aString);