if (!ignore_case) return set_replacement(NULL);
// Here, we need to check for characters whose upper and lower cases
// are outside the Latin-1 range.
- // TODO(dcarney): Replace this code with a simple
- // table lookup in unibrow::Latin-1.
- // TODO(dcarney): Test cases!.
- unibrow::uchar result;
- int chars;
- chars = unibrow::ToLowercase::Convert(quarks[j], 0, &result, NULL);
- if (chars > 1 ||
- (chars == 1 && result <= String::kMaxOneByteCharCodeU)) {
- continue;
- }
- chars = unibrow::ToUppercase::Convert(quarks[j], 0, &result, NULL);
- if (chars > 1 ||
- (chars == 1 && result <= String::kMaxOneByteCharCodeU)) {
- continue;
+ if (!unibrow::Latin1::NonLatin1CanBeConvertedToLatin1(quarks[j])) {
+ return set_replacement(NULL);
}
- // This character is definitely not in the Latin-1 range.
- return set_replacement(NULL);
#endif
}
} else {
}
+bool Latin1::NonLatin1CanBeConvertedToLatin1(uint16_t c) {
+ ASSERT(c > Latin1::kMaxChar);
+ switch (c) {
+ case 0x130:
+ case 0x131:
+ case 0x149:
+ case 0x178:
+ case 0x17f:
+ case 0x1f0:
+ case 0x1e96:
+ case 0x1e97:
+ case 0x1e98:
+ case 0x1e99:
+ case 0x1e9a:
+ case 0x1e9e:
+ case 0x212a:
+ case 0x212b:
+ case 0xfb00:
+ case 0xfb01:
+ case 0xfb02:
+ case 0xfb03:
+ case 0xfb04:
+ case 0xfb05:
+ case 0xfb06:
+ return true;
+ }
+ return false;
+}
+
+
unsigned Utf8::Encode(char* str, uchar c, int previous) {
static const int kMask = ~(1 << 6);
if (c <= kMaxOneByteChar) {
#else
static const unsigned kMaxChar = 0xff;
#endif
+ static inline bool NonLatin1CanBeConvertedToLatin1(uint16_t);
};
class Utf8 {
CHECK(String::IsAscii(static_cast<char*>(NULL), 0));
CHECK(String::IsOneByte(static_cast<uc16*>(NULL), 0));
}
+
+
+static bool CanBeConvertedToLatin1(uint16_t c) {
+ CHECK(c > unibrow::Latin1::kMaxChar);
+ uint32_t result[4];
+ int chars;
+ chars = unibrow::ToLowercase::Convert(c, 0, result, NULL);
+ if (chars > 0) {
+ CHECK_LE(chars, static_cast<int>(sizeof(result)));
+ for (int i = 0; i < chars; i++) {
+ if (result[i] <= unibrow::Latin1::kMaxChar) {
+ return true;
+ }
+ }
+ }
+ chars = unibrow::ToUppercase::Convert(c, 0, result, NULL);
+ if (chars > 0) {
+ CHECK_LE(chars, static_cast<int>(sizeof(result)));
+ for (int i = 0; i < chars; i++) {
+ if (result[i] <= unibrow::Latin1::kMaxChar) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+TEST(Latin1) {
+#ifndef ENABLE_LATIN_1
+ if (true) return;
+#endif
+ for (uint16_t c = unibrow::Latin1::kMaxChar + 1; c != 0; c++) {
+ CHECK_EQ(CanBeConvertedToLatin1(c),
+ unibrow::Latin1::NonLatin1CanBeConvertedToLatin1(c));
+ }
+}