1 // Copyright 2020 The Pigweed Authors
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
7 // https://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
15 #include "pw_tokenizer/internal/decode.h"
22 #include "pw_varint/varint.h"
24 namespace pw::tokenizer {
27 // Functions for parsing a printf format specifier.
28 size_t SkipFlags(const char* str) {
30 while (str[i] == '-' || str[i] == '+' || str[i] == '#' || str[i] == ' ' ||
37 size_t SkipAsteriskOrInteger(const char* str) {
42 size_t i = (str[0] == '-' || str[0] == '+') ? 1 : 0;
44 while (std::isdigit(str[i])) {
50 std::array<char, 2> ReadLengthModifier(const char* str) {
51 // Check for ll or hh.
52 if (str[0] == str[1] && (str[0] == 'l' || str[0] == 'h')) {
53 return {str[0], str[1]};
55 if (std::strchr("hljztL", str[0]) != nullptr) {
61 // Returns the error message that is used in place of a decoded arg when an
63 std::string ErrorMessage(ArgStatus status,
64 const std::string_view& spec,
65 const std::string_view& value) {
67 if (status.HasError(ArgStatus::kSkipped)) {
69 } else if (status.HasError(ArgStatus::kMissing)) {
71 } else if (status.HasError(ArgStatus::kDecodeError)) {
74 message = "INTERNAL ERROR";
77 std::string result(PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX);
79 result.push_back(' ');
80 result.append(message);
83 result.push_back(' ');
84 result.push_back('(');
86 result.push_back(')');
89 result.append(PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX);
95 DecodedArg::DecodedArg(ArgStatus error,
96 const std::string_view& spec,
97 size_t raw_size_bytes,
98 const std::string_view& value)
99 : value_(ErrorMessage(error, spec, value)),
101 raw_data_size_bytes_(raw_size_bytes),
104 StringSegment StringSegment::ParseFormatSpec(const char* format) {
105 if (format[0] != '%' || format[1] == '\0') {
106 return StringSegment();
109 // Parse the format specifier.
113 i += SkipFlags(&format[i]);
115 // Skip the field width.
116 i += SkipAsteriskOrInteger(&format[i]);
118 // Skip the precision.
119 if (format[i] == '.') {
121 i += SkipAsteriskOrInteger(&format[i]);
124 // Read the length modifier.
125 const std::array<char, 2> length = ReadLengthModifier(&format[i]);
126 i += (length[0] == '\0' ? 0 : 1) + (length[1] == '\0' ? 0 : 1);
128 // Read the conversion specifier.
129 const char spec = format[i];
134 } else if (spec == 'c' || spec == 'd' || spec == 'i') {
136 } else if (std::strchr("oxXup", spec) != nullptr) {
137 // The source size matters for unsigned integers because they need to be
138 // masked off to their correct length, since zig-zag decode sign extends.
139 // TODO(hepler): 64-bit targets likely have 64-bit l, j, z, and t. Also, p
140 // needs to be 64-bit on these targets.
141 type = length[0] == 'j' || length[1] == 'l' ? kUnsigned64 : kUnsigned32;
142 } else if (std::strchr("fFeEaAgG", spec) != nullptr) {
143 type = kFloatingPoint;
144 } else if (spec == '%' && i == 1) {
147 return StringSegment();
150 return {std::string_view(format, i + 1), type, VarargSize(length, spec)};
153 StringSegment::ArgSize StringSegment::VarargSize(std::array<char, 2> length,
155 // Use pointer size for %p or any other type (for which this doesn't matter).
156 if (std::strchr("cdioxXu", spec) == nullptr) {
157 return VarargSize<void*>();
159 if (length[0] == 'l') {
160 return length[1] == 'l' ? VarargSize<long long>() : VarargSize<long>();
162 if (length[0] == 'j') {
163 return VarargSize<intmax_t>();
165 if (length[0] == 'z') {
166 return VarargSize<size_t>();
168 if (length[0] == 't') {
169 return VarargSize<ptrdiff_t>();
171 return VarargSize<int>();
174 DecodedArg StringSegment::DecodeString(
175 const std::span<const uint8_t>& arguments) const {
176 if (arguments.empty()) {
177 return DecodedArg(ArgStatus::kMissing, text_);
181 (arguments[0] & 0x80u) == 0u ? ArgStatus::kOk : ArgStatus::kTruncated;
183 const uint_fast8_t size = arguments[0] & 0x7Fu;
185 if (arguments.size() - 1 < size) {
186 status.Update(ArgStatus::kDecodeError);
191 {reinterpret_cast<const char*>(&arguments[1]), arguments.size() - 1});
194 std::string value(reinterpret_cast<const char*>(&arguments[1]), size);
196 if (status.HasError(ArgStatus::kTruncated)) {
197 value.append("[...]");
200 return DecodedArg::FromValue(text_.c_str(), value.c_str(), 1 + size, status);
203 DecodedArg StringSegment::DecodeInteger(
204 const std::span<const uint8_t>& arguments) const {
205 if (arguments.empty()) {
206 return DecodedArg(ArgStatus::kMissing, text_);
210 const size_t bytes = varint::Decode(std::as_bytes(arguments), &value);
214 ArgStatus::kDecodeError,
216 std::min(varint::kMaxVarint64SizeBytes, arguments.size()));
219 // Unsigned ints need to be masked to their bit width due to sign extension.
220 if (type_ == kUnsigned32) {
221 value &= 0xFFFFFFFFu;
224 if (local_size_ == k32Bit) {
225 return DecodedArg::FromValue(
226 text_.c_str(), static_cast<uint32_t>(value), bytes);
228 return DecodedArg::FromValue(text_.c_str(), value, bytes);
231 DecodedArg StringSegment::DecodeFloatingPoint(
232 const std::span<const uint8_t>& arguments) const {
233 static_assert(sizeof(float) == 4u);
234 if (arguments.size() < sizeof(float)) {
235 return DecodedArg(ArgStatus::kMissing, text_);
239 std::memcpy(&value, arguments.data(), sizeof(value));
240 return DecodedArg::FromValue(text_.c_str(), value, sizeof(value));
243 DecodedArg StringSegment::Decode(
244 const std::span<const uint8_t>& arguments) const {
247 return DecodedArg(text_);
249 return DecodedArg("%");
251 return DecodeString(arguments);
255 return DecodeInteger(arguments);
257 return DecodeFloatingPoint(arguments);
260 return DecodedArg(ArgStatus::kDecodeError, text_);
263 DecodedArg StringSegment::Skip() const {
266 return DecodedArg(text_);
268 return DecodedArg("%");
270 return DecodedArg(ArgStatus::kSkipped, text_);
274 std::string DecodedFormatString::value() const {
277 for (const DecodedArg& arg : segments_) {
278 output.append(arg.ok() ? arg.value() : arg.spec());
284 std::string DecodedFormatString::value_with_errors() const {
287 for (const DecodedArg& arg : segments_) {
288 output.append(arg.value());
294 size_t DecodedFormatString::argument_count() const {
295 return std::count_if(segments_.begin(), segments_.end(), [](const auto& arg) {
296 return !arg.spec().empty();
300 size_t DecodedFormatString::decoding_errors() const {
301 return std::count_if(segments_.begin(), segments_.end(), [](const auto& arg) {
306 FormatString::FormatString(const char* format) {
307 const char* text_start = format;
309 while (format[0] != '\0') {
310 if (StringSegment spec = StringSegment::ParseFormatSpec(format);
312 // Add the text segment seen so far (if any).
313 if (text_start < format) {
314 segments_.emplace_back(
315 std::string_view(text_start, format - text_start));
318 // Move along the index and text segment start.
319 format += spec.text().size();
322 // Add the format specifier that was just found.
323 segments_.push_back(std::move(spec));
329 if (text_start < format) {
330 segments_.emplace_back(std::string_view(text_start, format - text_start));
334 DecodedFormatString FormatString::Format(
335 std::span<const uint8_t> arguments) const {
336 std::vector<DecodedArg> results;
339 for (const auto& segment : segments_) {
341 results.push_back(segment.Skip());
343 results.push_back(segment.Decode(arguments));
344 arguments = arguments.subspan(results.back().raw_size_bytes());
346 // If an error occurred, skip decoding the remaining arguments.
347 if (!results.back().ok()) {
353 return DecodedFormatString(std::move(results), arguments.size());
356 } // namespace pw::tokenizer