1 /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying
2 file Copyright.txt or https://cmake.org/licensing for details. */
3 #include "cm_codecvt.hxx"
11 # include "cmsys/Encoding.hxx"
16 codecvt::codecvt(Encoding e)
22 case codecvt::ConsoleOutput:
25 m_codepage = GetConsoleOutputCP();
34 // We don't know which ANSI encoding to use for other platforms than
35 // Windows so we don't do any conversion there
37 case codecvt::UTF8_WITH_BOM:
38 // Assume internal encoding is UTF-8
42 this->m_noconv = true;
46 codecvt::~codecvt() = default;
48 bool codecvt::do_always_noconv() const noexcept
50 return this->m_noconv;
53 std::codecvt_base::result codecvt::do_out(mbstate_t& state, const char* from,
55 const char*& from_next, char* to,
56 char* to_end, char*& to_next) const
61 return std::codecvt_base::noconv;
64 // Use a const view of the state because we should not modify it until we
65 // have fully processed and consume a byte (with sufficient space in the
66 // output buffer). We call helpers to re-cast and modify the state
67 State const& lstate = reinterpret_cast<State&>(state);
69 while (from_next != from_end) {
70 // Count leading ones in the bits of the next byte.
71 unsigned char const ones =
72 cm_utf8_ones[static_cast<unsigned char>(*from_next)];
74 if (ones != 1 && lstate.buffered != 0) {
75 // We have a buffered partial codepoint that we never completed.
76 return std::codecvt_base::error;
77 } else if (ones == 1 && lstate.buffered == 0) {
78 // This is a continuation of a codepoint that never started.
79 return std::codecvt_base::error;
82 // Compute the number of bytes in the current codepoint.
85 case 0: // 0xxx xxxx: new codepoint of size 1
88 case 1: // 10xx xxxx: continues a codepoint
89 assert(lstate.size != 0);
92 case 2: // 110x xxxx: new codepoint of size 2
95 case 3: // 1110 xxxx: new codepoint of size 3
98 case 4: // 1111 0xxx: new codepoint of size 4
101 default: // invalid byte
102 return std::codecvt_base::error;
106 if (lstate.buffered + 1 == need) {
107 // This byte completes a codepoint.
108 std::codecvt_base::result decode_result =
109 this->Decode(state, need, from_next, to_next, to_end);
110 if (decode_result != std::codecvt_base::ok) {
111 return decode_result;
114 // This byte does not complete a codepoint.
115 this->BufferPartial(state, need, from_next);
119 return std::codecvt_base::ok;
121 static_cast<void>(state);
122 static_cast<void>(from);
123 static_cast<void>(from_end);
124 static_cast<void>(from_next);
125 static_cast<void>(to);
126 static_cast<void>(to_end);
127 static_cast<void>(to_next);
128 return std::codecvt_base::noconv;
132 std::codecvt_base::result codecvt::do_unshift(mbstate_t& state, char* to,
134 char*& to_next) const
137 if (this->m_noconv) {
138 return std::codecvt_base::noconv;
141 State& lstate = reinterpret_cast<State&>(state);
142 if (lstate.buffered != 0) {
143 return this->DecodePartial(state, to_next, to_end);
145 return std::codecvt_base::ok;
147 static_cast<void>(state);
148 static_cast<void>(to_end);
149 return std::codecvt_base::ok;
154 std::codecvt_base::result codecvt::Decode(mbstate_t& state, int size,
155 const char*& from_next,
156 char*& to_next, char* to_end) const
158 State& lstate = reinterpret_cast<State&>(state);
160 // Collect all the bytes for this codepoint.
162 memcpy(buf, lstate.partial, lstate.buffered);
163 buf[lstate.buffered] = *from_next;
165 // Convert the encoding.
168 MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, buf, size, wbuf, 2);
170 return std::codecvt_base::error;
173 int tlen = WideCharToMultiByte(m_codepage, 0, wbuf, wlen, to_next,
174 to_end - to_next, NULL, NULL);
176 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
177 return std::codecvt_base::partial;
179 return std::codecvt_base::error;
182 // Move past the now-consumed byte in the input buffer.
185 // Move past the converted codepoint in the output buffer.
188 // Re-initialize the state for the next codepoint to start.
191 return std::codecvt_base::ok;
194 std::codecvt_base::result codecvt::DecodePartial(mbstate_t& state,
198 State& lstate = reinterpret_cast<State&>(state);
200 // Try converting the partial codepoint.
202 int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, lstate.partial,
203 lstate.buffered, wbuf, 2);
205 return std::codecvt_base::error;
208 int tlen = WideCharToMultiByte(m_codepage, 0, wbuf, wlen, to_next,
209 to_end - to_next, NULL, NULL);
211 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
212 return std::codecvt_base::partial;
214 return std::codecvt_base::error;
217 // Move past the converted codepoint in the output buffer.
220 // Re-initialize the state for the next codepoint to start.
223 return std::codecvt_base::ok;
226 void codecvt::BufferPartial(mbstate_t& state, int size,
227 const char*& from_next) const
229 State& lstate = reinterpret_cast<State&>(state);
231 // Save the byte in our buffer for later.
232 lstate.partial[lstate.buffered++] = *from_next;
235 // Move past the now-consumed byte in the input buffer.
240 int codecvt::do_max_length() const noexcept
245 int codecvt::do_encoding() const noexcept