1 // Copyright 2014 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // A streaming validator for UTF-8. Validation is based on the definition in
6 // RFC-3629. In particular, it does not reject the invalid characters rejected
7 // by base::IsStringUTF8().
9 // The implementation detects errors on the first possible byte.
11 #ifndef BASE_I18N_STREAMING_UTF8_VALIDATOR_H_
12 #define BASE_I18N_STREAMING_UTF8_VALIDATOR_H_
19 #include "base/i18n/base_i18n_export.h"
23 class BASE_I18N_EXPORT StreamingUtf8Validator {
25 // The validator exposes 3 states. It starts in state VALID_ENDPOINT. As it
26 // processes characters it alternates between VALID_ENDPOINT and
27 // VALID_MIDPOINT. If it encounters an invalid byte or UTF-8 sequence the
28 // state changes permanently to INVALID.
35 StreamingUtf8Validator() : state_(0u) {}
37 // This type could be made copyable but there is currently no use-case for
39 StreamingUtf8Validator(const StreamingUtf8Validator&) = delete;
40 StreamingUtf8Validator& operator=(const StreamingUtf8Validator&) = delete;
42 // Trivial destructor intentionally omitted.
44 // Validate |size| bytes starting at |data|. If the concatenation of all calls
45 // to AddBytes() since this object was constructed or reset is a valid UTF-8
46 // string, returns VALID_ENDPOINT. If it could be the prefix of a valid UTF-8
47 // string, returns VALID_MIDPOINT. If an invalid byte or UTF-8 sequence was
48 // present, returns INVALID.
49 State AddBytes(const char* data, size_t size);
51 // Return the object to a freshly-constructed state so that it can be re-used.
54 // Validate a complete string using the same criteria. Returns true if the
55 // string only contains complete, valid UTF-8 codepoints.
56 static bool Validate(const std::string& string);
59 // The current state of the validator. Value 0 is the initial/valid state.
60 // The state is stored as an offset into |kUtf8ValidatorTables|. The special
61 // state |kUtf8InvalidState| is invalid.
67 #endif // BASE_I18N_STREAMING_UTF8_VALIDATOR_H_