return elem1.parameter < elem2.parameter;
}
+// Assuming that a pointer is the size of a "machine word", then
+// uintptr_t is an integer type that is also a machine word.
+typedef uintptr_t MachineWord;
+const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
+
+inline bool IsAlignedToMachineWord(const void* pointer) {
+ return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
+}
+
+template<typename T> inline T* AlignToMachineWord(T* pointer) {
+ return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
+ ~kMachineWordAlignmentMask);
+}
+
+template<size_t size, typename CharacterType> struct NonASCIIMask;
+template<> struct NonASCIIMask<4, base::char16> {
+ static inline uint32_t value() { return 0xFF80FF80U; }
+};
+template<> struct NonASCIIMask<4, char> {
+ static inline uint32_t value() { return 0x80808080U; }
+};
+template<> struct NonASCIIMask<8, base::char16> {
+ static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
+};
+template<> struct NonASCIIMask<8, char> {
+ static inline uint64_t value() { return 0x8080808080808080ULL; }
+};
+#if defined(WCHAR_T_IS_UTF32)
+template<> struct NonASCIIMask<4, wchar_t> {
+ static inline uint32_t value() { return 0xFFFFFF80U; }
+};
+template<> struct NonASCIIMask<8, wchar_t> {
+ static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; }
+};
+#endif // WCHAR_T_IS_UTF32
+
} // namespace
namespace base {
return input.find_first_not_of(characters) == StringPiece16::npos;
}
-template<class STR>
-static bool DoIsStringASCII(const STR& str) {
- for (size_t i = 0; i < str.length(); i++) {
- typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
- if (c > 0x7F)
- return false;
+template <class Char>
+inline bool DoIsStringASCII(const Char* characters, size_t length) {
+ MachineWord all_char_bits = 0;
+ const Char* end = characters + length;
+
+ // Prologue: align the input.
+ while (!IsAlignedToMachineWord(characters) && characters != end) {
+ all_char_bits |= *characters;
+ ++characters;
}
- return true;
+
+ // Compare the values of CPU word size.
+ const Char* word_end = AlignToMachineWord(end);
+ const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
+ while (characters < word_end) {
+ all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
+ characters += loop_increment;
+ }
+
+ // Process the remaining bytes.
+ while (characters != end) {
+ all_char_bits |= *characters;
+ ++characters;
+ }
+
+ MachineWord non_ascii_bit_mask =
+ NonASCIIMask<sizeof(MachineWord), Char>::value();
+ return !(all_char_bits & non_ascii_bit_mask);
}
bool IsStringASCII(const StringPiece& str) {
- return DoIsStringASCII(str);
+ return DoIsStringASCII(str.data(), str.length());
+}
+
+bool IsStringASCII(const StringPiece16& str) {
+ return DoIsStringASCII(str.data(), str.length());
}
bool IsStringASCII(const string16& str) {
- return DoIsStringASCII(str);
+ return DoIsStringASCII(str.data(), str.length());
+}
+
+#if defined(WCHAR_T_IS_UTF32)
+bool IsStringASCII(const std::wstring& str) {
+ return DoIsStringASCII(str.data(), str.length());
}
+#endif
bool IsStringUTF8(const std::string& str) {
const char *src = str.data();