From: Levi Broderick Date: Tue, 24 Sep 2019 22:09:53 +0000 (-0700) Subject: Add more Unicode validation tests (dotnet/corefx#41265) X-Git-Tag: submit/tizen/20210909.063632~11031^2~412 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b791f83402be3f00c66eceeac48baf4235b2182d;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Add more Unicode validation tests (dotnet/corefx#41265) * Add more Unicode validation tests * PR feedback and add more char tests Commit migrated from https://github.com/dotnet/corefx/commit/b59265f6dda8e0462f17b96d85e1356245261de9 --- diff --git a/src/libraries/Common/tests/Data/CaseFolding-12.1.0.txt b/src/libraries/Common/tests/Data/CaseFolding-12.1.0.txt new file mode 100644 index 0000000..7eeb915 --- /dev/null +++ b/src/libraries/Common/tests/Data/CaseFolding-12.1.0.txt @@ -0,0 +1,1581 @@ +# CaseFolding-12.1.0.txt +# Date: 2019-03-10, 10:53:00 GMT +# © 2019 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# Case Folding Properties +# +# This file is a supplement to the UnicodeData file. +# It provides a case folding mapping generated from the Unicode Character Database. +# If all characters are mapped according to the full mapping below, then +# case differences (according to UnicodeData.txt and SpecialCasing.txt) +# are eliminated. +# +# The data supports both implementations that require simple case foldings +# (where string lengths don't change), and implementations that allow full case folding +# (where string lengths may grow). Note that where they can be supported, the +# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match. +# +# All code points not listed in this file map to themselves. +# +# NOTE: case folding does not preserve normalization formats! +# +# For information on case folding, including how to have case folding +# preserve normalization formats, see Section 3.13 Default Case Algorithms in +# The Unicode Standard. +# +# ================================================================================ +# Format +# ================================================================================ +# The entries in this file are in the following machine-readable format: +# +# ; ; ; # +# +# The status field is: +# C: common case folding, common mappings shared by both simple and full mappings. +# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. +# S: simple case folding, mappings to single characters where different from F. +# T: special case for uppercase I and dotted uppercase I +# - For non-Turkic languages, this mapping is normally not used. +# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. +# Note that the Turkic mappings do not maintain canonical equivalence without additional processing. +# See the discussions of case mapping in the Unicode Standard for more information. +# +# Usage: +# A. To do a simple case folding, use the mappings with status C + S. +# B. To do a full case folding, use the mappings with status C + F. +# +# The mappings with status T can be used or omitted depending on the desired case-folding +# behavior. (The default option is to exclude them.) +# +# ================================================================= + +# Property: Case_Folding + +# All code points not explicitly listed for Case_Folding +# have the value C for the status field, and the code point itself for the mapping field. + +# ================================================================= +0041; C; 0061; # LATIN CAPITAL LETTER A +0042; C; 0062; # LATIN CAPITAL LETTER B +0043; C; 0063; # LATIN CAPITAL LETTER C +0044; C; 0064; # LATIN CAPITAL LETTER D +0045; C; 0065; # LATIN CAPITAL LETTER E +0046; C; 0066; # LATIN CAPITAL LETTER F +0047; C; 0067; # LATIN CAPITAL LETTER G +0048; C; 0068; # LATIN CAPITAL LETTER H +0049; C; 0069; # LATIN CAPITAL LETTER I +0049; T; 0131; # LATIN CAPITAL LETTER I +004A; C; 006A; # LATIN CAPITAL LETTER J +004B; C; 006B; # LATIN CAPITAL LETTER K +004C; C; 006C; # LATIN CAPITAL LETTER L +004D; C; 006D; # LATIN CAPITAL LETTER M +004E; C; 006E; # LATIN CAPITAL LETTER N +004F; C; 006F; # LATIN CAPITAL LETTER O +0050; C; 0070; # LATIN CAPITAL LETTER P +0051; C; 0071; # LATIN CAPITAL LETTER Q +0052; C; 0072; # LATIN CAPITAL LETTER R +0053; C; 0073; # LATIN CAPITAL LETTER S +0054; C; 0074; # LATIN CAPITAL LETTER T +0055; C; 0075; # LATIN CAPITAL LETTER U +0056; C; 0076; # LATIN CAPITAL LETTER V +0057; C; 0077; # LATIN CAPITAL LETTER W +0058; C; 0078; # LATIN CAPITAL LETTER X +0059; C; 0079; # LATIN CAPITAL LETTER Y +005A; C; 007A; # LATIN CAPITAL LETTER Z +00B5; C; 03BC; # MICRO SIGN +00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE +00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE +00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE +00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS +00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE +00C6; C; 00E6; # LATIN CAPITAL LETTER AE +00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA +00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE +00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE +00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS +00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE +00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE +00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS +00D0; C; 00F0; # LATIN CAPITAL LETTER ETH +00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE +00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE +00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE +00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE +00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS +00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE +00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE +00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE +00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS +00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE +00DE; C; 00FE; # LATIN CAPITAL LETTER THORN +00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S +0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON +0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE +0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK +0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE +0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX +010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE +010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON +010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON +0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE +0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON +0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE +0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE +0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK +011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON +011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX +011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE +0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE +0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA +0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE +0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE +012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON +012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE +012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK +0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE +0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE +0132; C; 0133; # LATIN CAPITAL LIGATURE IJ +0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA +0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE +013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA +013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON +013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT +0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE +0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE +0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA +0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON +0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +014A; C; 014B; # LATIN CAPITAL LETTER ENG +014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON +014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE +0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0152; C; 0153; # LATIN CAPITAL LIGATURE OE +0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE +0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA +0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON +015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE +015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX +015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA +0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON +0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA +0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON +0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE +0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE +016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON +016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE +016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE +0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK +0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS +0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE +017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE +017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON +017F; C; 0073; # LATIN SMALL LETTER LONG S +0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK +0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR +0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX +0186; C; 0254; # LATIN CAPITAL LETTER OPEN O +0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK +0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D +018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK +018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR +018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E +018F; C; 0259; # LATIN CAPITAL LETTER SCHWA +0190; C; 025B; # LATIN CAPITAL LETTER OPEN E +0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK +0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK +0194; C; 0263; # LATIN CAPITAL LETTER GAMMA +0196; C; 0269; # LATIN CAPITAL LETTER IOTA +0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE +0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK +019C; C; 026F; # LATIN CAPITAL LETTER TURNED M +019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK +019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE +01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN +01A2; C; 01A3; # LATIN CAPITAL LETTER OI +01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK +01A6; C; 0280; # LATIN LETTER YR +01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO +01A9; C; 0283; # LATIN CAPITAL LETTER ESH +01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK +01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK +01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN +01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON +01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK +01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK +01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE +01B7; C; 0292; # LATIN CAPITAL LETTER EZH +01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED +01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE +01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON +01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +01C7; C; 01C9; # LATIN CAPITAL LETTER LJ +01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J +01CA; C; 01CC; # LATIN CAPITAL LETTER NJ +01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J +01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON +01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON +01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON +01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON +01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON +01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON +01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON +01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE +01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON +01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON +01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK +01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON +01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON +01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON +01F1; C; 01F3; # LATIN CAPITAL LETTER DZ +01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z +01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE +01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR +01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN +01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE +01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE +01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE +01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE +0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE +0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE +0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE +0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE +020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE +020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE +020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE +0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE +0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE +0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE +0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE +0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW +021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW +021C; C; 021D; # LATIN CAPITAL LETTER YOGH +021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON +0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +0222; C; 0223; # LATIN CAPITAL LETTER OU +0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK +0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE +0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA +022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON +022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON +022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE +0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON +0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON +023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE +023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE +023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR +023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE +0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP +0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE +0244; C; 0289; # LATIN CAPITAL LETTER U BAR +0245; C; 028C; # LATIN CAPITAL LETTER TURNED V +0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE +0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE +024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL +024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE +024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE +0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI +0370; C; 0371; # GREEK CAPITAL LETTER HETA +0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI +0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA +037F; C; 03F3; # GREEK CAPITAL LETTER YOT +0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS +0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS +0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS +038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS +038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS +038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS +038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS +0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA +0392; C; 03B2; # GREEK CAPITAL LETTER BETA +0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA +0394; C; 03B4; # GREEK CAPITAL LETTER DELTA +0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON +0396; C; 03B6; # GREEK CAPITAL LETTER ZETA +0397; C; 03B7; # GREEK CAPITAL LETTER ETA +0398; C; 03B8; # GREEK CAPITAL LETTER THETA +0399; C; 03B9; # GREEK CAPITAL LETTER IOTA +039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA +039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA +039C; C; 03BC; # GREEK CAPITAL LETTER MU +039D; C; 03BD; # GREEK CAPITAL LETTER NU +039E; C; 03BE; # GREEK CAPITAL LETTER XI +039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON +03A0; C; 03C0; # GREEK CAPITAL LETTER PI +03A1; C; 03C1; # GREEK CAPITAL LETTER RHO +03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA +03A4; C; 03C4; # GREEK CAPITAL LETTER TAU +03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON +03A6; C; 03C6; # GREEK CAPITAL LETTER PHI +03A7; C; 03C7; # GREEK CAPITAL LETTER CHI +03A8; C; 03C8; # GREEK CAPITAL LETTER PSI +03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA +03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA +03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL +03D0; C; 03B2; # GREEK BETA SYMBOL +03D1; C; 03B8; # GREEK THETA SYMBOL +03D5; C; 03C6; # GREEK PHI SYMBOL +03D6; C; 03C0; # GREEK PI SYMBOL +03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA +03DA; C; 03DB; # GREEK LETTER STIGMA +03DC; C; 03DD; # GREEK LETTER DIGAMMA +03DE; C; 03DF; # GREEK LETTER KOPPA +03E0; C; 03E1; # GREEK LETTER SAMPI +03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI +03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI +03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI +03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI +03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA +03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA +03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI +03F0; C; 03BA; # GREEK KAPPA SYMBOL +03F1; C; 03C1; # GREEK RHO SYMBOL +03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL +03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL +03F7; C; 03F8; # GREEK CAPITAL LETTER SHO +03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL +03FA; C; 03FB; # GREEK CAPITAL LETTER SAN +03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL +03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL +03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL +0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE +0401; C; 0451; # CYRILLIC CAPITAL LETTER IO +0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE +0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE +0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE +0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE +0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0407; C; 0457; # CYRILLIC CAPITAL LETTER YI +0408; C; 0458; # CYRILLIC CAPITAL LETTER JE +0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE +040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE +040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE +040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE +040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE +040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U +040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE +0410; C; 0430; # CYRILLIC CAPITAL LETTER A +0411; C; 0431; # CYRILLIC CAPITAL LETTER BE +0412; C; 0432; # CYRILLIC CAPITAL LETTER VE +0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE +0414; C; 0434; # CYRILLIC CAPITAL LETTER DE +0415; C; 0435; # CYRILLIC CAPITAL LETTER IE +0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE +0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE +0418; C; 0438; # CYRILLIC CAPITAL LETTER I +0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I +041A; C; 043A; # CYRILLIC CAPITAL LETTER KA +041B; C; 043B; # CYRILLIC CAPITAL LETTER EL +041C; C; 043C; # CYRILLIC CAPITAL LETTER EM +041D; C; 043D; # CYRILLIC CAPITAL LETTER EN +041E; C; 043E; # CYRILLIC CAPITAL LETTER O +041F; C; 043F; # CYRILLIC CAPITAL LETTER PE +0420; C; 0440; # CYRILLIC CAPITAL LETTER ER +0421; C; 0441; # CYRILLIC CAPITAL LETTER ES +0422; C; 0442; # CYRILLIC CAPITAL LETTER TE +0423; C; 0443; # CYRILLIC CAPITAL LETTER U +0424; C; 0444; # CYRILLIC CAPITAL LETTER EF +0425; C; 0445; # CYRILLIC CAPITAL LETTER HA +0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE +0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE +0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA +0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA +042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN +042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU +042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN +042D; C; 044D; # CYRILLIC CAPITAL LETTER E +042E; C; 044E; # CYRILLIC CAPITAL LETTER YU +042F; C; 044F; # CYRILLIC CAPITAL LETTER YA +0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA +0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT +0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E +0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS +0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS +046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI +0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI +0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA +0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA +0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0478; C; 0479; # CYRILLIC CAPITAL LETTER UK +047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA +047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +047E; C; 047F; # CYRILLIC CAPITAL LETTER OT +0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA +048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL +048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN +048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK +0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER +049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE +04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA +04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE +04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA +04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER +04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER +04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U +04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER +04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE +04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA +04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE +04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA +04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE +04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK +04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL +04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK +04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL +04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL +04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE +04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE +04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE +04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA +04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE +04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON +04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS +04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O +04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS +04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON +04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER +04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK +04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK +04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE +0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE +0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE +0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE +0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE +0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE +050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE +050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE +050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE +0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE +0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK +0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA +0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA +0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE +051A; C; 051B; # CYRILLIC CAPITAL LETTER QA +051C; C; 051D; # CYRILLIC CAPITAL LETTER WE +051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA +0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK +0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK +0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER +0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER +0528; C; 0529; # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK +052A; C; 052B; # CYRILLIC CAPITAL LETTER DZZHE +052C; C; 052D; # CYRILLIC CAPITAL LETTER DCHE +052E; C; 052F; # CYRILLIC CAPITAL LETTER EL WITH DESCENDER +0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB +0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN +0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM +0534; C; 0564; # ARMENIAN CAPITAL LETTER DA +0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH +0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA +0537; C; 0567; # ARMENIAN CAPITAL LETTER EH +0538; C; 0568; # ARMENIAN CAPITAL LETTER ET +0539; C; 0569; # ARMENIAN CAPITAL LETTER TO +053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE +053B; C; 056B; # ARMENIAN CAPITAL LETTER INI +053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN +053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH +053E; C; 056E; # ARMENIAN CAPITAL LETTER CA +053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN +0540; C; 0570; # ARMENIAN CAPITAL LETTER HO +0541; C; 0571; # ARMENIAN CAPITAL LETTER JA +0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD +0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH +0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN +0545; C; 0575; # ARMENIAN CAPITAL LETTER YI +0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW +0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA +0548; C; 0578; # ARMENIAN CAPITAL LETTER VO +0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA +054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH +054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH +054C; C; 057C; # ARMENIAN CAPITAL LETTER RA +054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH +054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW +054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN +0550; C; 0580; # ARMENIAN CAPITAL LETTER REH +0551; C; 0581; # ARMENIAN CAPITAL LETTER CO +0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN +0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR +0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH +0555; C; 0585; # ARMENIAN CAPITAL LETTER OH +0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH +0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN +10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN +10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN +10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN +10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON +10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN +10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN +10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN +10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN +10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN +10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN +10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS +10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN +10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR +10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON +10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR +10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR +10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE +10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN +10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR +10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN +10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR +10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR +10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN +10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR +10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN +10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN +10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN +10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL +10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL +10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR +10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN +10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN +10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE +10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE +10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE +10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE +10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR +10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE +10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN +10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN +13F8; C; 13F0; # CHEROKEE SMALL LETTER YE +13F9; C; 13F1; # CHEROKEE SMALL LETTER YI +13FA; C; 13F2; # CHEROKEE SMALL LETTER YO +13FB; C; 13F3; # CHEROKEE SMALL LETTER YU +13FC; C; 13F4; # CHEROKEE SMALL LETTER YV +13FD; C; 13F5; # CHEROKEE SMALL LETTER MV +1C80; C; 0432; # CYRILLIC SMALL LETTER ROUNDED VE +1C81; C; 0434; # CYRILLIC SMALL LETTER LONG-LEGGED DE +1C82; C; 043E; # CYRILLIC SMALL LETTER NARROW O +1C83; C; 0441; # CYRILLIC SMALL LETTER WIDE ES +1C84; C; 0442; # CYRILLIC SMALL LETTER TALL TE +1C85; C; 0442; # CYRILLIC SMALL LETTER THREE-LEGGED TE +1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN +1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT +1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK +1C90; C; 10D0; # GEORGIAN MTAVRULI CAPITAL LETTER AN +1C91; C; 10D1; # GEORGIAN MTAVRULI CAPITAL LETTER BAN +1C92; C; 10D2; # GEORGIAN MTAVRULI CAPITAL LETTER GAN +1C93; C; 10D3; # GEORGIAN MTAVRULI CAPITAL LETTER DON +1C94; C; 10D4; # GEORGIAN MTAVRULI CAPITAL LETTER EN +1C95; C; 10D5; # GEORGIAN MTAVRULI CAPITAL LETTER VIN +1C96; C; 10D6; # GEORGIAN MTAVRULI CAPITAL LETTER ZEN +1C97; C; 10D7; # GEORGIAN MTAVRULI CAPITAL LETTER TAN +1C98; C; 10D8; # GEORGIAN MTAVRULI CAPITAL LETTER IN +1C99; C; 10D9; # GEORGIAN MTAVRULI CAPITAL LETTER KAN +1C9A; C; 10DA; # GEORGIAN MTAVRULI CAPITAL LETTER LAS +1C9B; C; 10DB; # GEORGIAN MTAVRULI CAPITAL LETTER MAN +1C9C; C; 10DC; # GEORGIAN MTAVRULI CAPITAL LETTER NAR +1C9D; C; 10DD; # GEORGIAN MTAVRULI CAPITAL LETTER ON +1C9E; C; 10DE; # GEORGIAN MTAVRULI CAPITAL LETTER PAR +1C9F; C; 10DF; # GEORGIAN MTAVRULI CAPITAL LETTER ZHAR +1CA0; C; 10E0; # GEORGIAN MTAVRULI CAPITAL LETTER RAE +1CA1; C; 10E1; # GEORGIAN MTAVRULI CAPITAL LETTER SAN +1CA2; C; 10E2; # GEORGIAN MTAVRULI CAPITAL LETTER TAR +1CA3; C; 10E3; # GEORGIAN MTAVRULI CAPITAL LETTER UN +1CA4; C; 10E4; # GEORGIAN MTAVRULI CAPITAL LETTER PHAR +1CA5; C; 10E5; # GEORGIAN MTAVRULI CAPITAL LETTER KHAR +1CA6; C; 10E6; # GEORGIAN MTAVRULI CAPITAL LETTER GHAN +1CA7; C; 10E7; # GEORGIAN MTAVRULI CAPITAL LETTER QAR +1CA8; C; 10E8; # GEORGIAN MTAVRULI CAPITAL LETTER SHIN +1CA9; C; 10E9; # GEORGIAN MTAVRULI CAPITAL LETTER CHIN +1CAA; C; 10EA; # GEORGIAN MTAVRULI CAPITAL LETTER CAN +1CAB; C; 10EB; # GEORGIAN MTAVRULI CAPITAL LETTER JIL +1CAC; C; 10EC; # GEORGIAN MTAVRULI CAPITAL LETTER CIL +1CAD; C; 10ED; # GEORGIAN MTAVRULI CAPITAL LETTER CHAR +1CAE; C; 10EE; # GEORGIAN MTAVRULI CAPITAL LETTER XAN +1CAF; C; 10EF; # GEORGIAN MTAVRULI CAPITAL LETTER JHAN +1CB0; C; 10F0; # GEORGIAN MTAVRULI CAPITAL LETTER HAE +1CB1; C; 10F1; # GEORGIAN MTAVRULI CAPITAL LETTER HE +1CB2; C; 10F2; # GEORGIAN MTAVRULI CAPITAL LETTER HIE +1CB3; C; 10F3; # GEORGIAN MTAVRULI CAPITAL LETTER WE +1CB4; C; 10F4; # GEORGIAN MTAVRULI CAPITAL LETTER HAR +1CB5; C; 10F5; # GEORGIAN MTAVRULI CAPITAL LETTER HOE +1CB6; C; 10F6; # GEORGIAN MTAVRULI CAPITAL LETTER FI +1CB7; C; 10F7; # GEORGIAN MTAVRULI CAPITAL LETTER YN +1CB8; C; 10F8; # GEORGIAN MTAVRULI CAPITAL LETTER ELIFI +1CB9; C; 10F9; # GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN +1CBA; C; 10FA; # GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD; C; 10FD; # GEORGIAN MTAVRULI CAPITAL LETTER AEN +1CBE; C; 10FE; # GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN +1CBF; C; 10FF; # GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW +1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE +1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW +1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW +1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE +1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE +1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW +1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW +1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA +1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW +1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE +1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE +1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW +1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW +1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE +1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE +1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON +1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE +1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW +1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS +1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA +1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW +1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW +1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE +1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE +1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW +1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW +1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW +1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON +1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW +1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW +1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE +1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE +1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW +1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE +1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW +1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW +1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW +1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE +1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS +1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE +1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE +1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE +1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE +1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE +1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW +1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON +1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW +1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE +1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW +1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE +1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE +1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE +1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW +1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW +1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW +1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW +1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW +1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW +1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE +1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS +1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE +1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW +1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE +1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE +1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS +1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE +1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW +1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE +1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS +1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE +1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX +1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW +1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW +1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW +1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS +1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE +1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE +1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING +1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE +1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S +1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S +1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW +1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE +1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE +1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE +1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE +1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE +1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE +1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW +1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW +1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE +1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE +1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE +1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE +1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW +1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW +1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE +1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE +1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE +1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE +1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE +1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE +1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW +1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW +1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE +1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE +1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE +1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE +1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE +1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW +1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE +1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW +1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE +1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE +1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL +1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V +1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP +1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI +1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA +1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA +1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA +1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA +1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA +1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI +1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI +1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA +1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA +1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA +1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA +1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI +1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA +1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA +1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA +1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA +1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA +1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI +1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI +1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI +1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA +1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA +1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA +1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA +1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA +1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI +1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI +1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI +1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA +1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA +1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA +1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA +1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI +1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA +1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA +1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI +1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI +1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA +1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA +1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA +1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA +1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA +1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI +1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI +1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI +1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI +1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI +1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI +1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI +1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI +1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI +1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI +1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI +1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI +1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI +1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI +1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI +1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI +1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI +1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI +1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI +1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI +1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI +1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI +1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY +1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON +1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA +1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA +1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE; C; 03B9; # GREEK PROSGEGRAMMENI +1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI +1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI +1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI +1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA +1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA +1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA +1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA +1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA +1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI +1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY +1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON +1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA +1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA +1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA +1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA +1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI +1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI +1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY +1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON +1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA +1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA +1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA +1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI +1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI +1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI +1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI +1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA +1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA +1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA +1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA +1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2126; C; 03C9; # OHM SIGN +212A; C; 006B; # KELVIN SIGN +212B; C; 00E5; # ANGSTROM SIGN +2132; C; 214E; # TURNED CAPITAL F +2160; C; 2170; # ROMAN NUMERAL ONE +2161; C; 2171; # ROMAN NUMERAL TWO +2162; C; 2172; # ROMAN NUMERAL THREE +2163; C; 2173; # ROMAN NUMERAL FOUR +2164; C; 2174; # ROMAN NUMERAL FIVE +2165; C; 2175; # ROMAN NUMERAL SIX +2166; C; 2176; # ROMAN NUMERAL SEVEN +2167; C; 2177; # ROMAN NUMERAL EIGHT +2168; C; 2178; # ROMAN NUMERAL NINE +2169; C; 2179; # ROMAN NUMERAL TEN +216A; C; 217A; # ROMAN NUMERAL ELEVEN +216B; C; 217B; # ROMAN NUMERAL TWELVE +216C; C; 217C; # ROMAN NUMERAL FIFTY +216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED +216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED +216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND +2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED +24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A +24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B +24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C +24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D +24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E +24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F +24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G +24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H +24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I +24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J +24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K +24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L +24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M +24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N +24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O +24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P +24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q +24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R +24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S +24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T +24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U +24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V +24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W +24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X +24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y +24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z +2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU +2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY +2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE +2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI +2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO +2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU +2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE +2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO +2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA +2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE +2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE +2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I +2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI +2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO +2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE +2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE +2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI +2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU +2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI +2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI +2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO +2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO +2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU +2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU +2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU +2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU +2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE +2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA +2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI +2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI +2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA +2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU +2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI +2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI +2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA +2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU +2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS +2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL +2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO +2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS +2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS +2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS +2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA +2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA +2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC +2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A +2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE +2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR +2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE +2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE +2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL +2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER +2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER +2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER +2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA +2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK +2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A +2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA +2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK +2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H +2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL +2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL +2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA +2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA +2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA +2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA +2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE +2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU +2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA +2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE +2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE +2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA +2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA +2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA +2C98; C; 2C99; # COPTIC CAPITAL LETTER MI +2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI +2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI +2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O +2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI +2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO +2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA +2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU +2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA +2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI +2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI +2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI +2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU +2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF +2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN +2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE +2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA +2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI +2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI +2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU +2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI +2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI +2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI +2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH +2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI +2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI +2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI +2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA +2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA +2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI +2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT +2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA +2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA +2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA +2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA +2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI +2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI +2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU +2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI +2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI +A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA +A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO +A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE +A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA +A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV +A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK +A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA +A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER +A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER +A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT +A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU +A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A +A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS +A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS +A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS +A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN +A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE +A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE +A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL +A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM +A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O +A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O +A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O +A680; C; A681; # CYRILLIC CAPITAL LETTER DWE +A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE +A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE +A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE +A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE +A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK +A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE +A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE +A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE +A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE +A694; C; A695; # CYRILLIC CAPITAL LETTER HWE +A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE +A698; C; A699; # CYRILLIC CAPITAL LETTER DOUBLE O +A69A; C; A69B; # CYRILLIC CAPITAL LETTER CROSSED O +A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF +A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN +A726; C; A727; # LATIN CAPITAL LETTER HENG +A728; C; A729; # LATIN CAPITAL LETTER TZ +A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO +A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO +A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA +A732; C; A733; # LATIN CAPITAL LETTER AA +A734; C; A735; # LATIN CAPITAL LETTER AO +A736; C; A737; # LATIN CAPITAL LETTER AU +A738; C; A739; # LATIN CAPITAL LETTER AV +A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR +A73C; C; A73D; # LATIN CAPITAL LETTER AY +A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT +A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE +A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE +A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE +A746; C; A747; # LATIN CAPITAL LETTER BROKEN L +A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE +A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY +A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP +A74E; C; A74F; # LATIN CAPITAL LETTER OO +A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER +A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH +A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL +A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER +A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE +A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA +A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA +A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE +A760; C; A761; # LATIN CAPITAL LETTER VY +A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z +A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE +A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER +A768; C; A769; # LATIN CAPITAL LETTER VEND +A76A; C; A76B; # LATIN CAPITAL LETTER ET +A76C; C; A76D; # LATIN CAPITAL LETTER IS +A76E; C; A76F; # LATIN CAPITAL LETTER CON +A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D +A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F +A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G +A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G +A780; C; A781; # LATIN CAPITAL LETTER TURNED L +A782; C; A783; # LATIN CAPITAL LETTER INSULAR R +A784; C; A785; # LATIN CAPITAL LETTER INSULAR S +A786; C; A787; # LATIN CAPITAL LETTER INSULAR T +A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO +A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H +A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER +A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR +A796; C; A797; # LATIN CAPITAL LETTER B WITH FLOURISH +A798; C; A799; # LATIN CAPITAL LETTER F WITH STROKE +A79A; C; A79B; # LATIN CAPITAL LETTER VOLAPUK AE +A79C; C; A79D; # LATIN CAPITAL LETTER VOLAPUK OE +A79E; C; A79F; # LATIN CAPITAL LETTER VOLAPUK UE +A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE +A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE +A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE +A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE +A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK +A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E +A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G +A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT +A7AE; C; 026A; # LATIN CAPITAL LETTER SMALL CAPITAL I +A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K +A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T +A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL +A7B3; C; AB53; # LATIN CAPITAL LETTER CHI +A7B4; C; A7B5; # LATIN CAPITAL LETTER BETA +A7B6; C; A7B7; # LATIN CAPITAL LETTER OMEGA +A7B8; C; A7B9; # LATIN CAPITAL LETTER U WITH STROKE +A7BA; C; A7BB; # LATIN CAPITAL LETTER GLOTTAL A +A7BC; C; A7BD; # LATIN CAPITAL LETTER GLOTTAL I +A7BE; C; A7BF; # LATIN CAPITAL LETTER GLOTTAL U +A7C2; C; A7C3; # LATIN CAPITAL LETTER ANGLICANA W +A7C4; C; A794; # LATIN CAPITAL LETTER C WITH PALATAL HOOK +A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK +A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK +AB70; C; 13A0; # CHEROKEE SMALL LETTER A +AB71; C; 13A1; # CHEROKEE SMALL LETTER E +AB72; C; 13A2; # CHEROKEE SMALL LETTER I +AB73; C; 13A3; # CHEROKEE SMALL LETTER O +AB74; C; 13A4; # CHEROKEE SMALL LETTER U +AB75; C; 13A5; # CHEROKEE SMALL LETTER V +AB76; C; 13A6; # CHEROKEE SMALL LETTER GA +AB77; C; 13A7; # CHEROKEE SMALL LETTER KA +AB78; C; 13A8; # CHEROKEE SMALL LETTER GE +AB79; C; 13A9; # CHEROKEE SMALL LETTER GI +AB7A; C; 13AA; # CHEROKEE SMALL LETTER GO +AB7B; C; 13AB; # CHEROKEE SMALL LETTER GU +AB7C; C; 13AC; # CHEROKEE SMALL LETTER GV +AB7D; C; 13AD; # CHEROKEE SMALL LETTER HA +AB7E; C; 13AE; # CHEROKEE SMALL LETTER HE +AB7F; C; 13AF; # CHEROKEE SMALL LETTER HI +AB80; C; 13B0; # CHEROKEE SMALL LETTER HO +AB81; C; 13B1; # CHEROKEE SMALL LETTER HU +AB82; C; 13B2; # CHEROKEE SMALL LETTER HV +AB83; C; 13B3; # CHEROKEE SMALL LETTER LA +AB84; C; 13B4; # CHEROKEE SMALL LETTER LE +AB85; C; 13B5; # CHEROKEE SMALL LETTER LI +AB86; C; 13B6; # CHEROKEE SMALL LETTER LO +AB87; C; 13B7; # CHEROKEE SMALL LETTER LU +AB88; C; 13B8; # CHEROKEE SMALL LETTER LV +AB89; C; 13B9; # CHEROKEE SMALL LETTER MA +AB8A; C; 13BA; # CHEROKEE SMALL LETTER ME +AB8B; C; 13BB; # CHEROKEE SMALL LETTER MI +AB8C; C; 13BC; # CHEROKEE SMALL LETTER MO +AB8D; C; 13BD; # CHEROKEE SMALL LETTER MU +AB8E; C; 13BE; # CHEROKEE SMALL LETTER NA +AB8F; C; 13BF; # CHEROKEE SMALL LETTER HNA +AB90; C; 13C0; # CHEROKEE SMALL LETTER NAH +AB91; C; 13C1; # CHEROKEE SMALL LETTER NE +AB92; C; 13C2; # CHEROKEE SMALL LETTER NI +AB93; C; 13C3; # CHEROKEE SMALL LETTER NO +AB94; C; 13C4; # CHEROKEE SMALL LETTER NU +AB95; C; 13C5; # CHEROKEE SMALL LETTER NV +AB96; C; 13C6; # CHEROKEE SMALL LETTER QUA +AB97; C; 13C7; # CHEROKEE SMALL LETTER QUE +AB98; C; 13C8; # CHEROKEE SMALL LETTER QUI +AB99; C; 13C9; # CHEROKEE SMALL LETTER QUO +AB9A; C; 13CA; # CHEROKEE SMALL LETTER QUU +AB9B; C; 13CB; # CHEROKEE SMALL LETTER QUV +AB9C; C; 13CC; # CHEROKEE SMALL LETTER SA +AB9D; C; 13CD; # CHEROKEE SMALL LETTER S +AB9E; C; 13CE; # CHEROKEE SMALL LETTER SE +AB9F; C; 13CF; # CHEROKEE SMALL LETTER SI +ABA0; C; 13D0; # CHEROKEE SMALL LETTER SO +ABA1; C; 13D1; # CHEROKEE SMALL LETTER SU +ABA2; C; 13D2; # CHEROKEE SMALL LETTER SV +ABA3; C; 13D3; # CHEROKEE SMALL LETTER DA +ABA4; C; 13D4; # CHEROKEE SMALL LETTER TA +ABA5; C; 13D5; # CHEROKEE SMALL LETTER DE +ABA6; C; 13D6; # CHEROKEE SMALL LETTER TE +ABA7; C; 13D7; # CHEROKEE SMALL LETTER DI +ABA8; C; 13D8; # CHEROKEE SMALL LETTER TI +ABA9; C; 13D9; # CHEROKEE SMALL LETTER DO +ABAA; C; 13DA; # CHEROKEE SMALL LETTER DU +ABAB; C; 13DB; # CHEROKEE SMALL LETTER DV +ABAC; C; 13DC; # CHEROKEE SMALL LETTER DLA +ABAD; C; 13DD; # CHEROKEE SMALL LETTER TLA +ABAE; C; 13DE; # CHEROKEE SMALL LETTER TLE +ABAF; C; 13DF; # CHEROKEE SMALL LETTER TLI +ABB0; C; 13E0; # CHEROKEE SMALL LETTER TLO +ABB1; C; 13E1; # CHEROKEE SMALL LETTER TLU +ABB2; C; 13E2; # CHEROKEE SMALL LETTER TLV +ABB3; C; 13E3; # CHEROKEE SMALL LETTER TSA +ABB4; C; 13E4; # CHEROKEE SMALL LETTER TSE +ABB5; C; 13E5; # CHEROKEE SMALL LETTER TSI +ABB6; C; 13E6; # CHEROKEE SMALL LETTER TSO +ABB7; C; 13E7; # CHEROKEE SMALL LETTER TSU +ABB8; C; 13E8; # CHEROKEE SMALL LETTER TSV +ABB9; C; 13E9; # CHEROKEE SMALL LETTER WA +ABBA; C; 13EA; # CHEROKEE SMALL LETTER WE +ABBB; C; 13EB; # CHEROKEE SMALL LETTER WI +ABBC; C; 13EC; # CHEROKEE SMALL LETTER WO +ABBD; C; 13ED; # CHEROKEE SMALL LETTER WU +ABBE; C; 13EE; # CHEROKEE SMALL LETTER WV +ABBF; C; 13EF; # CHEROKEE SMALL LETTER YA +FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF +FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI +FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL +FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI +FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL +FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T +FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST +FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW +FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH +FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI +FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW +FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH +FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A +FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B +FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C +FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D +FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E +FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F +FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G +FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H +FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I +FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J +FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K +FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L +FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M +FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N +FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O +FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P +FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q +FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R +FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S +FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T +FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U +FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V +FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W +FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X +FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y +FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z +10400; C; 10428; # DESERET CAPITAL LETTER LONG I +10401; C; 10429; # DESERET CAPITAL LETTER LONG E +10402; C; 1042A; # DESERET CAPITAL LETTER LONG A +10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH +10404; C; 1042C; # DESERET CAPITAL LETTER LONG O +10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO +10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I +10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E +10408; C; 10430; # DESERET CAPITAL LETTER SHORT A +10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH +1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O +1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO +1040C; C; 10434; # DESERET CAPITAL LETTER AY +1040D; C; 10435; # DESERET CAPITAL LETTER OW +1040E; C; 10436; # DESERET CAPITAL LETTER WU +1040F; C; 10437; # DESERET CAPITAL LETTER YEE +10410; C; 10438; # DESERET CAPITAL LETTER H +10411; C; 10439; # DESERET CAPITAL LETTER PEE +10412; C; 1043A; # DESERET CAPITAL LETTER BEE +10413; C; 1043B; # DESERET CAPITAL LETTER TEE +10414; C; 1043C; # DESERET CAPITAL LETTER DEE +10415; C; 1043D; # DESERET CAPITAL LETTER CHEE +10416; C; 1043E; # DESERET CAPITAL LETTER JEE +10417; C; 1043F; # DESERET CAPITAL LETTER KAY +10418; C; 10440; # DESERET CAPITAL LETTER GAY +10419; C; 10441; # DESERET CAPITAL LETTER EF +1041A; C; 10442; # DESERET CAPITAL LETTER VEE +1041B; C; 10443; # DESERET CAPITAL LETTER ETH +1041C; C; 10444; # DESERET CAPITAL LETTER THEE +1041D; C; 10445; # DESERET CAPITAL LETTER ES +1041E; C; 10446; # DESERET CAPITAL LETTER ZEE +1041F; C; 10447; # DESERET CAPITAL LETTER ESH +10420; C; 10448; # DESERET CAPITAL LETTER ZHEE +10421; C; 10449; # DESERET CAPITAL LETTER ER +10422; C; 1044A; # DESERET CAPITAL LETTER EL +10423; C; 1044B; # DESERET CAPITAL LETTER EM +10424; C; 1044C; # DESERET CAPITAL LETTER EN +10425; C; 1044D; # DESERET CAPITAL LETTER ENG +10426; C; 1044E; # DESERET CAPITAL LETTER OI +10427; C; 1044F; # DESERET CAPITAL LETTER EW +104B0; C; 104D8; # OSAGE CAPITAL LETTER A +104B1; C; 104D9; # OSAGE CAPITAL LETTER AI +104B2; C; 104DA; # OSAGE CAPITAL LETTER AIN +104B3; C; 104DB; # OSAGE CAPITAL LETTER AH +104B4; C; 104DC; # OSAGE CAPITAL LETTER BRA +104B5; C; 104DD; # OSAGE CAPITAL LETTER CHA +104B6; C; 104DE; # OSAGE CAPITAL LETTER EHCHA +104B7; C; 104DF; # OSAGE CAPITAL LETTER E +104B8; C; 104E0; # OSAGE CAPITAL LETTER EIN +104B9; C; 104E1; # OSAGE CAPITAL LETTER HA +104BA; C; 104E2; # OSAGE CAPITAL LETTER HYA +104BB; C; 104E3; # OSAGE CAPITAL LETTER I +104BC; C; 104E4; # OSAGE CAPITAL LETTER KA +104BD; C; 104E5; # OSAGE CAPITAL LETTER EHKA +104BE; C; 104E6; # OSAGE CAPITAL LETTER KYA +104BF; C; 104E7; # OSAGE CAPITAL LETTER LA +104C0; C; 104E8; # OSAGE CAPITAL LETTER MA +104C1; C; 104E9; # OSAGE CAPITAL LETTER NA +104C2; C; 104EA; # OSAGE CAPITAL LETTER O +104C3; C; 104EB; # OSAGE CAPITAL LETTER OIN +104C4; C; 104EC; # OSAGE CAPITAL LETTER PA +104C5; C; 104ED; # OSAGE CAPITAL LETTER EHPA +104C6; C; 104EE; # OSAGE CAPITAL LETTER SA +104C7; C; 104EF; # OSAGE CAPITAL LETTER SHA +104C8; C; 104F0; # OSAGE CAPITAL LETTER TA +104C9; C; 104F1; # OSAGE CAPITAL LETTER EHTA +104CA; C; 104F2; # OSAGE CAPITAL LETTER TSA +104CB; C; 104F3; # OSAGE CAPITAL LETTER EHTSA +104CC; C; 104F4; # OSAGE CAPITAL LETTER TSHA +104CD; C; 104F5; # OSAGE CAPITAL LETTER DHA +104CE; C; 104F6; # OSAGE CAPITAL LETTER U +104CF; C; 104F7; # OSAGE CAPITAL LETTER WA +104D0; C; 104F8; # OSAGE CAPITAL LETTER KHA +104D1; C; 104F9; # OSAGE CAPITAL LETTER GHA +104D2; C; 104FA; # OSAGE CAPITAL LETTER ZA +104D3; C; 104FB; # OSAGE CAPITAL LETTER ZHA +10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A +10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA +10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB +10C83; C; 10CC3; # OLD HUNGARIAN CAPITAL LETTER AMB +10C84; C; 10CC4; # OLD HUNGARIAN CAPITAL LETTER EC +10C85; C; 10CC5; # OLD HUNGARIAN CAPITAL LETTER ENC +10C86; C; 10CC6; # OLD HUNGARIAN CAPITAL LETTER ECS +10C87; C; 10CC7; # OLD HUNGARIAN CAPITAL LETTER ED +10C88; C; 10CC8; # OLD HUNGARIAN CAPITAL LETTER AND +10C89; C; 10CC9; # OLD HUNGARIAN CAPITAL LETTER E +10C8A; C; 10CCA; # OLD HUNGARIAN CAPITAL LETTER CLOSE E +10C8B; C; 10CCB; # OLD HUNGARIAN CAPITAL LETTER EE +10C8C; C; 10CCC; # OLD HUNGARIAN CAPITAL LETTER EF +10C8D; C; 10CCD; # OLD HUNGARIAN CAPITAL LETTER EG +10C8E; C; 10CCE; # OLD HUNGARIAN CAPITAL LETTER EGY +10C8F; C; 10CCF; # OLD HUNGARIAN CAPITAL LETTER EH +10C90; C; 10CD0; # OLD HUNGARIAN CAPITAL LETTER I +10C91; C; 10CD1; # OLD HUNGARIAN CAPITAL LETTER II +10C92; C; 10CD2; # OLD HUNGARIAN CAPITAL LETTER EJ +10C93; C; 10CD3; # OLD HUNGARIAN CAPITAL LETTER EK +10C94; C; 10CD4; # OLD HUNGARIAN CAPITAL LETTER AK +10C95; C; 10CD5; # OLD HUNGARIAN CAPITAL LETTER UNK +10C96; C; 10CD6; # OLD HUNGARIAN CAPITAL LETTER EL +10C97; C; 10CD7; # OLD HUNGARIAN CAPITAL LETTER ELY +10C98; C; 10CD8; # OLD HUNGARIAN CAPITAL LETTER EM +10C99; C; 10CD9; # OLD HUNGARIAN CAPITAL LETTER EN +10C9A; C; 10CDA; # OLD HUNGARIAN CAPITAL LETTER ENY +10C9B; C; 10CDB; # OLD HUNGARIAN CAPITAL LETTER O +10C9C; C; 10CDC; # OLD HUNGARIAN CAPITAL LETTER OO +10C9D; C; 10CDD; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE +10C9E; C; 10CDE; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE +10C9F; C; 10CDF; # OLD HUNGARIAN CAPITAL LETTER OEE +10CA0; C; 10CE0; # OLD HUNGARIAN CAPITAL LETTER EP +10CA1; C; 10CE1; # OLD HUNGARIAN CAPITAL LETTER EMP +10CA2; C; 10CE2; # OLD HUNGARIAN CAPITAL LETTER ER +10CA3; C; 10CE3; # OLD HUNGARIAN CAPITAL LETTER SHORT ER +10CA4; C; 10CE4; # OLD HUNGARIAN CAPITAL LETTER ES +10CA5; C; 10CE5; # OLD HUNGARIAN CAPITAL LETTER ESZ +10CA6; C; 10CE6; # OLD HUNGARIAN CAPITAL LETTER ET +10CA7; C; 10CE7; # OLD HUNGARIAN CAPITAL LETTER ENT +10CA8; C; 10CE8; # OLD HUNGARIAN CAPITAL LETTER ETY +10CA9; C; 10CE9; # OLD HUNGARIAN CAPITAL LETTER ECH +10CAA; C; 10CEA; # OLD HUNGARIAN CAPITAL LETTER U +10CAB; C; 10CEB; # OLD HUNGARIAN CAPITAL LETTER UU +10CAC; C; 10CEC; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE +10CAD; C; 10CED; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE +10CAE; C; 10CEE; # OLD HUNGARIAN CAPITAL LETTER EV +10CAF; C; 10CEF; # OLD HUNGARIAN CAPITAL LETTER EZ +10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS +10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN +10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US +118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA +118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A +118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI +118A3; C; 118C3; # WARANG CITI CAPITAL LETTER YU +118A4; C; 118C4; # WARANG CITI CAPITAL LETTER YA +118A5; C; 118C5; # WARANG CITI CAPITAL LETTER YO +118A6; C; 118C6; # WARANG CITI CAPITAL LETTER II +118A7; C; 118C7; # WARANG CITI CAPITAL LETTER UU +118A8; C; 118C8; # WARANG CITI CAPITAL LETTER E +118A9; C; 118C9; # WARANG CITI CAPITAL LETTER O +118AA; C; 118CA; # WARANG CITI CAPITAL LETTER ANG +118AB; C; 118CB; # WARANG CITI CAPITAL LETTER GA +118AC; C; 118CC; # WARANG CITI CAPITAL LETTER KO +118AD; C; 118CD; # WARANG CITI CAPITAL LETTER ENY +118AE; C; 118CE; # WARANG CITI CAPITAL LETTER YUJ +118AF; C; 118CF; # WARANG CITI CAPITAL LETTER UC +118B0; C; 118D0; # WARANG CITI CAPITAL LETTER ENN +118B1; C; 118D1; # WARANG CITI CAPITAL LETTER ODD +118B2; C; 118D2; # WARANG CITI CAPITAL LETTER TTE +118B3; C; 118D3; # WARANG CITI CAPITAL LETTER NUNG +118B4; C; 118D4; # WARANG CITI CAPITAL LETTER DA +118B5; C; 118D5; # WARANG CITI CAPITAL LETTER AT +118B6; C; 118D6; # WARANG CITI CAPITAL LETTER AM +118B7; C; 118D7; # WARANG CITI CAPITAL LETTER BU +118B8; C; 118D8; # WARANG CITI CAPITAL LETTER PU +118B9; C; 118D9; # WARANG CITI CAPITAL LETTER HIYO +118BA; C; 118DA; # WARANG CITI CAPITAL LETTER HOLO +118BB; C; 118DB; # WARANG CITI CAPITAL LETTER HORR +118BC; C; 118DC; # WARANG CITI CAPITAL LETTER HAR +118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU +118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII +118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO +16E40; C; 16E60; # MEDEFAIDRIN CAPITAL LETTER M +16E41; C; 16E61; # MEDEFAIDRIN CAPITAL LETTER S +16E42; C; 16E62; # MEDEFAIDRIN CAPITAL LETTER V +16E43; C; 16E63; # MEDEFAIDRIN CAPITAL LETTER W +16E44; C; 16E64; # MEDEFAIDRIN CAPITAL LETTER ATIU +16E45; C; 16E65; # MEDEFAIDRIN CAPITAL LETTER Z +16E46; C; 16E66; # MEDEFAIDRIN CAPITAL LETTER KP +16E47; C; 16E67; # MEDEFAIDRIN CAPITAL LETTER P +16E48; C; 16E68; # MEDEFAIDRIN CAPITAL LETTER T +16E49; C; 16E69; # MEDEFAIDRIN CAPITAL LETTER G +16E4A; C; 16E6A; # MEDEFAIDRIN CAPITAL LETTER F +16E4B; C; 16E6B; # MEDEFAIDRIN CAPITAL LETTER I +16E4C; C; 16E6C; # MEDEFAIDRIN CAPITAL LETTER K +16E4D; C; 16E6D; # MEDEFAIDRIN CAPITAL LETTER A +16E4E; C; 16E6E; # MEDEFAIDRIN CAPITAL LETTER J +16E4F; C; 16E6F; # MEDEFAIDRIN CAPITAL LETTER E +16E50; C; 16E70; # MEDEFAIDRIN CAPITAL LETTER B +16E51; C; 16E71; # MEDEFAIDRIN CAPITAL LETTER C +16E52; C; 16E72; # MEDEFAIDRIN CAPITAL LETTER U +16E53; C; 16E73; # MEDEFAIDRIN CAPITAL LETTER YU +16E54; C; 16E74; # MEDEFAIDRIN CAPITAL LETTER L +16E55; C; 16E75; # MEDEFAIDRIN CAPITAL LETTER Q +16E56; C; 16E76; # MEDEFAIDRIN CAPITAL LETTER HP +16E57; C; 16E77; # MEDEFAIDRIN CAPITAL LETTER NY +16E58; C; 16E78; # MEDEFAIDRIN CAPITAL LETTER X +16E59; C; 16E79; # MEDEFAIDRIN CAPITAL LETTER D +16E5A; C; 16E7A; # MEDEFAIDRIN CAPITAL LETTER OE +16E5B; C; 16E7B; # MEDEFAIDRIN CAPITAL LETTER N +16E5C; C; 16E7C; # MEDEFAIDRIN CAPITAL LETTER R +16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O +16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI +16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y +1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF +1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI +1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM +1E903; C; 1E925; # ADLAM CAPITAL LETTER MIIM +1E904; C; 1E926; # ADLAM CAPITAL LETTER BA +1E905; C; 1E927; # ADLAM CAPITAL LETTER SINNYIIYHE +1E906; C; 1E928; # ADLAM CAPITAL LETTER PE +1E907; C; 1E929; # ADLAM CAPITAL LETTER BHE +1E908; C; 1E92A; # ADLAM CAPITAL LETTER RA +1E909; C; 1E92B; # ADLAM CAPITAL LETTER E +1E90A; C; 1E92C; # ADLAM CAPITAL LETTER FA +1E90B; C; 1E92D; # ADLAM CAPITAL LETTER I +1E90C; C; 1E92E; # ADLAM CAPITAL LETTER O +1E90D; C; 1E92F; # ADLAM CAPITAL LETTER DHA +1E90E; C; 1E930; # ADLAM CAPITAL LETTER YHE +1E90F; C; 1E931; # ADLAM CAPITAL LETTER WAW +1E910; C; 1E932; # ADLAM CAPITAL LETTER NUN +1E911; C; 1E933; # ADLAM CAPITAL LETTER KAF +1E912; C; 1E934; # ADLAM CAPITAL LETTER YA +1E913; C; 1E935; # ADLAM CAPITAL LETTER U +1E914; C; 1E936; # ADLAM CAPITAL LETTER JIIM +1E915; C; 1E937; # ADLAM CAPITAL LETTER CHI +1E916; C; 1E938; # ADLAM CAPITAL LETTER HA +1E917; C; 1E939; # ADLAM CAPITAL LETTER QAAF +1E918; C; 1E93A; # ADLAM CAPITAL LETTER GA +1E919; C; 1E93B; # ADLAM CAPITAL LETTER NYA +1E91A; C; 1E93C; # ADLAM CAPITAL LETTER TU +1E91B; C; 1E93D; # ADLAM CAPITAL LETTER NHA +1E91C; C; 1E93E; # ADLAM CAPITAL LETTER VA +1E91D; C; 1E93F; # ADLAM CAPITAL LETTER KHA +1E91E; C; 1E940; # ADLAM CAPITAL LETTER GBE +1E91F; C; 1E941; # ADLAM CAPITAL LETTER ZAL +1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO +1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA +# +# EOF diff --git a/src/libraries/Common/tests/Data/PropList-12.1.0.txt b/src/libraries/Common/tests/Data/PropList-12.1.0.txt new file mode 100644 index 0000000..4394602 --- /dev/null +++ b/src/libraries/Common/tests/Data/PropList-12.1.0.txt @@ -0,0 +1,1666 @@ +# PropList-12.1.0.txt +# Date: 2019-03-10, 10:53:16 GMT +# © 2019 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ + +# ================================================ + +0009..000D ; White_Space # Cc [5] .. +0020 ; White_Space # Zs SPACE +0085 ; White_Space # Cc +00A0 ; White_Space # Zs NO-BREAK SPACE +1680 ; White_Space # Zs OGHAM SPACE MARK +2000..200A ; White_Space # Zs [11] EN QUAD..HAIR SPACE +2028 ; White_Space # Zl LINE SEPARATOR +2029 ; White_Space # Zp PARAGRAPH SEPARATOR +202F ; White_Space # Zs NARROW NO-BREAK SPACE +205F ; White_Space # Zs MEDIUM MATHEMATICAL SPACE +3000 ; White_Space # Zs IDEOGRAPHIC SPACE + +# Total code points: 25 + +# ================================================ + +061C ; Bidi_Control # Cf ARABIC LETTER MARK +200E..200F ; Bidi_Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +202A..202E ; Bidi_Control # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +2066..2069 ; Bidi_Control # Cf [4] LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE + +# Total code points: 12 + +# ================================================ + +200C..200D ; Join_Control # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER + +# Total code points: 2 + +# ================================================ + +002D ; Dash # Pd HYPHEN-MINUS +058A ; Dash # Pd ARMENIAN HYPHEN +05BE ; Dash # Pd HEBREW PUNCTUATION MAQAF +1400 ; Dash # Pd CANADIAN SYLLABICS HYPHEN +1806 ; Dash # Pd MONGOLIAN TODO SOFT HYPHEN +2010..2015 ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR +2053 ; Dash # Po SWUNG DASH +207B ; Dash # Sm SUPERSCRIPT MINUS +208B ; Dash # Sm SUBSCRIPT MINUS +2212 ; Dash # Sm MINUS SIGN +2E17 ; Dash # Pd DOUBLE OBLIQUE HYPHEN +2E1A ; Dash # Pd HYPHEN WITH DIAERESIS +2E3A..2E3B ; Dash # Pd [2] TWO-EM DASH..THREE-EM DASH +2E40 ; Dash # Pd DOUBLE HYPHEN +301C ; Dash # Pd WAVE DASH +3030 ; Dash # Pd WAVY DASH +30A0 ; Dash # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +FE31..FE32 ; Dash # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE58 ; Dash # Pd SMALL EM DASH +FE63 ; Dash # Pd SMALL HYPHEN-MINUS +FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS + +# Total code points: 28 + +# ================================================ + +002D ; Hyphen # Pd HYPHEN-MINUS +00AD ; Hyphen # Cf SOFT HYPHEN +058A ; Hyphen # Pd ARMENIAN HYPHEN +1806 ; Hyphen # Pd MONGOLIAN TODO SOFT HYPHEN +2010..2011 ; Hyphen # Pd [2] HYPHEN..NON-BREAKING HYPHEN +2E17 ; Hyphen # Pd DOUBLE OBLIQUE HYPHEN +30FB ; Hyphen # Po KATAKANA MIDDLE DOT +FE63 ; Hyphen # Pd SMALL HYPHEN-MINUS +FF0D ; Hyphen # Pd FULLWIDTH HYPHEN-MINUS +FF65 ; Hyphen # Po HALFWIDTH KATAKANA MIDDLE DOT + +# Total code points: 11 + +# ================================================ + +0022 ; Quotation_Mark # Po QUOTATION MARK +0027 ; Quotation_Mark # Po APOSTROPHE +00AB ; Quotation_Mark # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00BB ; Quotation_Mark # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +2018 ; Quotation_Mark # Pi LEFT SINGLE QUOTATION MARK +2019 ; Quotation_Mark # Pf RIGHT SINGLE QUOTATION MARK +201A ; Quotation_Mark # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Quotation_Mark # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Quotation_Mark # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Quotation_Mark # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Quotation_Mark # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2039 ; Quotation_Mark # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Quotation_Mark # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +2E42 ; Quotation_Mark # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +300C ; Quotation_Mark # Ps LEFT CORNER BRACKET +300D ; Quotation_Mark # Pe RIGHT CORNER BRACKET +300E ; Quotation_Mark # Ps LEFT WHITE CORNER BRACKET +300F ; Quotation_Mark # Pe RIGHT WHITE CORNER BRACKET +301D ; Quotation_Mark # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Quotation_Mark # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +FE41 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FF02 ; Quotation_Mark # Po FULLWIDTH QUOTATION MARK +FF07 ; Quotation_Mark # Po FULLWIDTH APOSTROPHE +FF62 ; Quotation_Mark # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET + +# Total code points: 30 + +# ================================================ + +0021 ; Terminal_Punctuation # Po EXCLAMATION MARK +002C ; Terminal_Punctuation # Po COMMA +002E ; Terminal_Punctuation # Po FULL STOP +003A..003B ; Terminal_Punctuation # Po [2] COLON..SEMICOLON +003F ; Terminal_Punctuation # Po QUESTION MARK +037E ; Terminal_Punctuation # Po GREEK QUESTION MARK +0387 ; Terminal_Punctuation # Po GREEK ANO TELEIA +0589 ; Terminal_Punctuation # Po ARMENIAN FULL STOP +05C3 ; Terminal_Punctuation # Po HEBREW PUNCTUATION SOF PASUQ +060C ; Terminal_Punctuation # Po ARABIC COMMA +061B ; Terminal_Punctuation # Po ARABIC SEMICOLON +061E..061F ; Terminal_Punctuation # Po [2] ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK +06D4 ; Terminal_Punctuation # Po ARABIC FULL STOP +0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION +070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS +07F8..07F9 ; Terminal_Punctuation # Po [2] NKO COMMA..NKO EXCLAMATION MARK +0830..083E ; Terminal_Punctuation # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU +085E ; Terminal_Punctuation # Po MANDAIC PUNCTUATION +0964..0965 ; Terminal_Punctuation # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0E5A..0E5B ; Terminal_Punctuation # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT +0F08 ; Terminal_Punctuation # Po TIBETAN MARK SBRUL SHAD +0F0D..0F12 ; Terminal_Punctuation # Po [6] TIBETAN MARK SHAD..TIBETAN MARK RGYA GRAM SHAD +104A..104B ; Terminal_Punctuation # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION +1361..1368 ; Terminal_Punctuation # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +166E ; Terminal_Punctuation # Po CANADIAN SYLLABICS FULL STOP +16EB..16ED ; Terminal_Punctuation # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +1735..1736 ; Terminal_Punctuation # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +17D4..17D6 ; Terminal_Punctuation # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17DA ; Terminal_Punctuation # Po KHMER SIGN KOOMUUT +1802..1805 ; Terminal_Punctuation # Po [4] MONGOLIAN COMMA..MONGOLIAN FOUR DOTS +1808..1809 ; Terminal_Punctuation # Po [2] MONGOLIAN MANCHU COMMA..MONGOLIAN MANCHU FULL STOP +1944..1945 ; Terminal_Punctuation # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1AA8..1AAB ; Terminal_Punctuation # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU +1B5A..1B5B ; Terminal_Punctuation # Po [2] BALINESE PANTI..BALINESE PAMADA +1B5D..1B5F ; Terminal_Punctuation # Po [3] BALINESE CARIK PAMUNGKAH..BALINESE CARIK PAREREN +1C3B..1C3F ; Terminal_Punctuation # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C7E..1C7F ; Terminal_Punctuation # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +203C..203D ; Terminal_Punctuation # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +2047..2049 ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2E2E ; Terminal_Punctuation # Po REVERSED QUESTION MARK +2E3C ; Terminal_Punctuation # Po STENOGRAPHIC FULL STOP +2E41 ; Terminal_Punctuation # Po REVERSED COMMA +2E4C ; Terminal_Punctuation # Po MEDIEVAL COMMA +2E4E..2E4F ; Terminal_Punctuation # Po [2] PUNCTUS ELEVATUS MARK..CORNISH VERSE DIVIDER +3001..3002 ; Terminal_Punctuation # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP +A4FE..A4FF ; Terminal_Punctuation # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP +A60D..A60F ; Terminal_Punctuation # Po [3] VAI COMMA..VAI QUESTION MARK +A6F3..A6F7 ; Terminal_Punctuation # Po [5] BAMUM FULL STOP..BAMUM QUESTION MARK +A876..A877 ; Terminal_Punctuation # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD +A8CE..A8CF ; Terminal_Punctuation # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A92F ; Terminal_Punctuation # Po KAYAH LI SIGN SHYA +A9C7..A9C9 ; Terminal_Punctuation # Po [3] JAVANESE PADA PANGKAT..JAVANESE PADA LUNGSI +AA5D..AA5F ; Terminal_Punctuation # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AADF ; Terminal_Punctuation # Po TAI VIET SYMBOL KOI KOI +AAF0..AAF1 ; Terminal_Punctuation # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +ABEB ; Terminal_Punctuation # Po MEETEI MAYEK CHEIKHEI +FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP +FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK +FF01 ; Terminal_Punctuation # Po FULLWIDTH EXCLAMATION MARK +FF0C ; Terminal_Punctuation # Po FULLWIDTH COMMA +FF0E ; Terminal_Punctuation # Po FULLWIDTH FULL STOP +FF1A..FF1B ; Terminal_Punctuation # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON +FF1F ; Terminal_Punctuation # Po FULLWIDTH QUESTION MARK +FF61 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA +1039F ; Terminal_Punctuation # Po UGARITIC WORD DIVIDER +103D0 ; Terminal_Punctuation # Po OLD PERSIAN WORD DIVIDER +10857 ; Terminal_Punctuation # Po IMPERIAL ARAMAIC SECTION SIGN +1091F ; Terminal_Punctuation # Po PHOENICIAN WORD SEPARATOR +10A56..10A57 ; Terminal_Punctuation # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA +10AF0..10AF5 ; Terminal_Punctuation # Po [6] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION TWO DOTS +10B3A..10B3F ; Terminal_Punctuation # Po [6] TINY TWO DOTS OVER ONE DOT PUNCTUATION..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10B99..10B9C ; Terminal_Punctuation # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +10F55..10F59 ; Terminal_Punctuation # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +11047..1104D ; Terminal_Punctuation # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS +110BE..110C1 ; Terminal_Punctuation # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; Terminal_Punctuation # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; Terminal_Punctuation # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA +111CD ; Terminal_Punctuation # Po SHARADA SUTRA MARK +111DE..111DF ; Terminal_Punctuation # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2 +11238..1123C ; Terminal_Punctuation # Po [5] KHOJKI DANDA..KHOJKI DOUBLE SECTION MARK +112A9 ; Terminal_Punctuation # Po MULTANI SECTION MARK +1144B..1144D ; Terminal_Punctuation # Po [3] NEWA DANDA..NEWA COMMA +1145B ; Terminal_Punctuation # Po NEWA PLACEHOLDER MARK +115C2..115C5 ; Terminal_Punctuation # Po [4] SIDDHAM DANDA..SIDDHAM SEPARATOR BAR +115C9..115D7 ; Terminal_Punctuation # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +11641..11642 ; Terminal_Punctuation # Po [2] MODI DANDA..MODI DOUBLE DANDA +1173C..1173E ; Terminal_Punctuation # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +11A42..11A43 ; Terminal_Punctuation # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD +11A9B..11A9C ; Terminal_Punctuation # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD +11AA1..11AA2 ; Terminal_Punctuation # Po [2] SOYOMBO TERMINAL MARK-1..SOYOMBO TERMINAL MARK-2 +11C41..11C43 ; Terminal_Punctuation # Po [3] BHAIKSUKI DANDA..BHAIKSUKI WORD SEPARATOR +11C71 ; Terminal_Punctuation # Po MARCHEN MARK SHAD +11EF7..11EF8 ; Terminal_Punctuation # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +12470..12474 ; Terminal_Punctuation # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON +16A6E..16A6F ; Terminal_Punctuation # Po [2] MRO DANDA..MRO DOUBLE DANDA +16AF5 ; Terminal_Punctuation # Po BASSA VAH FULL STOP +16B37..16B39 ; Terminal_Punctuation # Po [3] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN CIM CHEEM +16B44 ; Terminal_Punctuation # Po PAHAWH HMONG SIGN XAUS +16E97..16E98 ; Terminal_Punctuation # Po [2] MEDEFAIDRIN COMMA..MEDEFAIDRIN FULL STOP +1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON + +# Total code points: 264 + +# ================================================ + +005E ; Other_Math # Sk CIRCUMFLEX ACCENT +03D0..03D2 ; Other_Math # L& [3] GREEK BETA SYMBOL..GREEK UPSILON WITH HOOK SYMBOL +03D5 ; Other_Math # L& GREEK PHI SYMBOL +03F0..03F1 ; Other_Math # L& [2] GREEK KAPPA SYMBOL..GREEK RHO SYMBOL +03F4..03F5 ; Other_Math # L& [2] GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL +2016 ; Other_Math # Po DOUBLE VERTICAL LINE +2032..2034 ; Other_Math # Po [3] PRIME..TRIPLE PRIME +2040 ; Other_Math # Pc CHARACTER TIE +2061..2064 ; Other_Math # Cf [4] FUNCTION APPLICATION..INVISIBLE PLUS +207D ; Other_Math # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; Other_Math # Pe SUPERSCRIPT RIGHT PARENTHESIS +208D ; Other_Math # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; Other_Math # Pe SUBSCRIPT RIGHT PARENTHESIS +20D0..20DC ; Other_Math # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20E1 ; Other_Math # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E5..20E6 ; Other_Math # Mn [2] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING DOUBLE VERTICAL STROKE OVERLAY +20EB..20EF ; Other_Math # Mn [5] COMBINING LONG DOUBLE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW +2102 ; Other_Math # L& DOUBLE-STRUCK CAPITAL C +2107 ; Other_Math # L& EULER CONSTANT +210A..2113 ; Other_Math # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; Other_Math # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; Other_Math # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; Other_Math # L& DOUBLE-STRUCK CAPITAL Z +2128 ; Other_Math # L& BLACK-LETTER CAPITAL Z +2129 ; Other_Math # So TURNED GREEK SMALL LETTER IOTA +212C..212D ; Other_Math # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C +212F..2131 ; Other_Math # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F +2133..2134 ; Other_Math # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O +2135..2138 ; Other_Math # Lo [4] ALEF SYMBOL..DALET SYMBOL +213C..213F ; Other_Math # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; Other_Math # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +2195..2199 ; Other_Math # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219C..219F ; Other_Math # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A1..21A2 ; Other_Math # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A4..21A5 ; Other_Math # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A7 ; Other_Math # So DOWNWARDS ARROW FROM BAR +21A9..21AD ; Other_Math # So [5] LEFTWARDS ARROW WITH HOOK..LEFT RIGHT WAVE ARROW +21B0..21B1 ; Other_Math # So [2] UPWARDS ARROW WITH TIP LEFTWARDS..UPWARDS ARROW WITH TIP RIGHTWARDS +21B6..21B7 ; Other_Math # So [2] ANTICLOCKWISE TOP SEMICIRCLE ARROW..CLOCKWISE TOP SEMICIRCLE ARROW +21BC..21CD ; Other_Math # So [18] LEFTWARDS HARPOON WITH BARB UPWARDS..LEFTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Other_Math # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D3 ; Other_Math # So DOWNWARDS DOUBLE ARROW +21D5..21DB ; Other_Math # So [7] UP DOWN DOUBLE ARROW..RIGHTWARDS TRIPLE ARROW +21DD ; Other_Math # So RIGHTWARDS SQUIGGLE ARROW +21E4..21E5 ; Other_Math # So [2] LEFTWARDS ARROW TO BAR..RIGHTWARDS ARROW TO BAR +2308 ; Other_Math # Ps LEFT CEILING +2309 ; Other_Math # Pe RIGHT CEILING +230A ; Other_Math # Ps LEFT FLOOR +230B ; Other_Math # Pe RIGHT FLOOR +23B4..23B5 ; Other_Math # So [2] TOP SQUARE BRACKET..BOTTOM SQUARE BRACKET +23B7 ; Other_Math # So RADICAL SYMBOL BOTTOM +23D0 ; Other_Math # So VERTICAL LINE EXTENSION +23E2 ; Other_Math # So WHITE TRAPEZIUM +25A0..25A1 ; Other_Math # So [2] BLACK SQUARE..WHITE SQUARE +25AE..25B6 ; Other_Math # So [9] BLACK VERTICAL RECTANGLE..BLACK RIGHT-POINTING TRIANGLE +25BC..25C0 ; Other_Math # So [5] BLACK DOWN-POINTING TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C6..25C7 ; Other_Math # So [2] BLACK DIAMOND..WHITE DIAMOND +25CA..25CB ; Other_Math # So [2] LOZENGE..WHITE CIRCLE +25CF..25D3 ; Other_Math # So [5] BLACK CIRCLE..CIRCLE WITH UPPER HALF BLACK +25E2 ; Other_Math # So BLACK LOWER RIGHT TRIANGLE +25E4 ; Other_Math # So BLACK UPPER LEFT TRIANGLE +25E7..25EC ; Other_Math # So [6] SQUARE WITH LEFT HALF BLACK..WHITE UP-POINTING TRIANGLE WITH DOT +2605..2606 ; Other_Math # So [2] BLACK STAR..WHITE STAR +2640 ; Other_Math # So FEMALE SIGN +2642 ; Other_Math # So MALE SIGN +2660..2663 ; Other_Math # So [4] BLACK SPADE SUIT..BLACK CLUB SUIT +266D..266E ; Other_Math # So [2] MUSIC FLAT SIGN..MUSIC NATURAL SIGN +27C5 ; Other_Math # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Other_Math # Pe RIGHT S-SHAPED BAG DELIMITER +27E6 ; Other_Math # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Other_Math # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Other_Math # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Other_Math # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Other_Math # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Other_Math # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Other_Math # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Other_Math # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Other_Math # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Other_Math # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +2983 ; Other_Math # Ps LEFT WHITE CURLY BRACKET +2984 ; Other_Math # Pe RIGHT WHITE CURLY BRACKET +2985 ; Other_Math # Ps LEFT WHITE PARENTHESIS +2986 ; Other_Math # Pe RIGHT WHITE PARENTHESIS +2987 ; Other_Math # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Other_Math # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Other_Math # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Other_Math # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Other_Math # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Other_Math # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Other_Math # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Other_Math # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Other_Math # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Other_Math # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Other_Math # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Other_Math # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Other_Math # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Other_Math # Pe RIGHT BLACK TORTOISE SHELL BRACKET +29D8 ; Other_Math # Ps LEFT WIGGLY FENCE +29D9 ; Other_Math # Pe RIGHT WIGGLY FENCE +29DA ; Other_Math # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Other_Math # Pe RIGHT DOUBLE WIGGLY FENCE +29FC ; Other_Math # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Other_Math # Pe RIGHT-POINTING CURVED ANGLE BRACKET +FE61 ; Other_Math # Po SMALL ASTERISK +FE63 ; Other_Math # Pd SMALL HYPHEN-MINUS +FE68 ; Other_Math # Po SMALL REVERSE SOLIDUS +FF3C ; Other_Math # Po FULLWIDTH REVERSE SOLIDUS +FF3E ; Other_Math # Sk FULLWIDTH CIRCUMFLEX ACCENT +1D400..1D454 ; Other_Math # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Other_Math # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Other_Math # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Other_Math # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Other_Math # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Other_Math # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Other_Math # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Other_Math # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Other_Math # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Other_Math # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Other_Math # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Other_Math # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Other_Math # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Other_Math # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Other_Math # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Other_Math # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Other_Math # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Other_Math # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; Other_Math # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; Other_Math # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; Other_Math # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; Other_Math # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; Other_Math # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; Other_Math # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; Other_Math # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; Other_Math # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; Other_Math # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; Other_Math # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Other_Math # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Other_Math # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Other_Math # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Other_Math # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Other_Math # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Other_Math # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Other_Math # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Other_Math # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Other_Math # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Other_Math # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Other_Math # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Other_Math # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Other_Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN + +# Total code points: 1362 + +# ================================================ + +0030..0039 ; Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE +0041..0046 ; Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F +0061..0066 ; Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F +FF10..FF19 ; Hex_Digit # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF21..FF26 ; Hex_Digit # L& [6] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER F +FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER F + +# Total code points: 44 + +# ================================================ + +0030..0039 ; ASCII_Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE +0041..0046 ; ASCII_Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F +0061..0066 ; ASCII_Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F + +# Total code points: 22 + +# ================================================ + +0345 ; Other_Alphabetic # Mn COMBINING GREEK YPOGEGRAMMENI +05B0..05BD ; Other_Alphabetic # Mn [14] HEBREW POINT SHEVA..HEBREW POINT METEG +05BF ; Other_Alphabetic # Mn HEBREW POINT RAFE +05C1..05C2 ; Other_Alphabetic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Other_Alphabetic # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Other_Alphabetic # Mn HEBREW POINT QAMATS QATAN +0610..061A ; Other_Alphabetic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..0657 ; Other_Alphabetic # Mn [13] ARABIC FATHATAN..ARABIC INVERTED DAMMA +0659..065F ; Other_Alphabetic # Mn [7] ARABIC ZWARAKAY..ARABIC WAVY HAMZA BELOW +0670 ; Other_Alphabetic # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; Other_Alphabetic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06E1..06E4 ; Other_Alphabetic # Mn [4] ARABIC SMALL HIGH DOTLESS HEAD OF KHAH..ARABIC SMALL HIGH MADDA +06E7..06E8 ; Other_Alphabetic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06ED ; Other_Alphabetic # Mn ARABIC SMALL LOW MEEM +0711 ; Other_Alphabetic # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..073F ; Other_Alphabetic # Mn [16] SYRIAC PTHAHA ABOVE..SYRIAC RWAHA +07A6..07B0 ; Other_Alphabetic # Mn [11] THAANA ABAFILI..THAANA SUKUN +0816..0817 ; Other_Alphabetic # Mn [2] SAMARITAN MARK IN..SAMARITAN MARK IN-ALAF +081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN +08D4..08DF ; Other_Alphabetic # Mn [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA +08E3..08E9 ; Other_Alphabetic # Mn [7] ARABIC TURNED DAMMA BELOW..ARABIC CURLY KASRATAN +08F0..0902 ; Other_Alphabetic # Mn [19] ARABIC OPEN FATHATAN..DEVANAGARI SIGN ANUSVARA +0903 ; Other_Alphabetic # Mc DEVANAGARI SIGN VISARGA +093A ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN OE +093B ; Other_Alphabetic # Mc DEVANAGARI VOWEL SIGN OOE +093E..0940 ; Other_Alphabetic # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; Other_Alphabetic # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; Other_Alphabetic # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; Other_Alphabetic # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0955..0957 ; Other_Alphabetic # Mn [3] DEVANAGARI VOWEL SIGN CANDRA LONG E..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; Other_Alphabetic # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; Other_Alphabetic # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; Other_Alphabetic # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +09BE..09C0 ; Other_Alphabetic # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; Other_Alphabetic # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09D7 ; Other_Alphabetic # Mc BENGALI AU LENGTH MARK +09E2..09E3 ; Other_Alphabetic # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +0A01..0A02 ; Other_Alphabetic # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; Other_Alphabetic # Mc GURMUKHI SIGN VISARGA +0A3E..0A40 ; Other_Alphabetic # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4C ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN OO..GURMUKHI VOWEL SIGN AU +0A51 ; Other_Alphabetic # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; Other_Alphabetic # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; Other_Alphabetic # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; Other_Alphabetic # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; Other_Alphabetic # Mc GUJARATI SIGN VISARGA +0ABE..0AC0 ; Other_Alphabetic # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; Other_Alphabetic # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; Other_Alphabetic # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; Other_Alphabetic # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0AE2..0AE3 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFC ; Other_Alphabetic # Mn [3] GUJARATI SIGN SUKUN..GUJARATI SIGN MADDAH +0B01 ; Other_Alphabetic # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; Other_Alphabetic # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B3E ; Other_Alphabetic # Mc ORIYA VOWEL SIGN AA +0B3F ; Other_Alphabetic # Mn ORIYA VOWEL SIGN I +0B40 ; Other_Alphabetic # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; Other_Alphabetic # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B56 ; Other_Alphabetic # Mn ORIYA AI LENGTH MARK +0B57 ; Other_Alphabetic # Mc ORIYA AU LENGTH MARK +0B62..0B63 ; Other_Alphabetic # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; Other_Alphabetic # Mn TAMIL SIGN ANUSVARA +0BBE..0BBF ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; Other_Alphabetic # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BD7 ; Other_Alphabetic # Mc TAMIL AU LENGTH MARK +0C00 ; Other_Alphabetic # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03 ; Other_Alphabetic # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C3E..0C40 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; Other_Alphabetic # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4C ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU +0C55..0C56 ; Other_Alphabetic # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; Other_Alphabetic # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; Other_Alphabetic # Mn KANNADA SIGN CANDRABINDU +0C82..0C83 ; Other_Alphabetic # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0CBE ; Other_Alphabetic # Mc KANNADA VOWEL SIGN AA +0CBF ; Other_Alphabetic # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; Other_Alphabetic # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; Other_Alphabetic # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC ; Other_Alphabetic # Mn KANNADA VOWEL SIGN AU +0CD5..0CD6 ; Other_Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CE2..0CE3 ; Other_Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D00..0D01 ; Other_Alphabetic # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D02..0D03 ; Other_Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D3E..0D40 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; Other_Alphabetic # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D57 ; Other_Alphabetic # Mc MALAYALAM AU LENGTH MARK +0D62..0D63 ; Other_Alphabetic # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D82..0D83 ; Other_Alphabetic # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0DCF..0DD1 ; Other_Alphabetic # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; Other_Alphabetic # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Other_Alphabetic # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; Other_Alphabetic # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DF2..0DF3 ; Other_Alphabetic # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E31 ; Other_Alphabetic # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; Other_Alphabetic # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E4D ; Other_Alphabetic # Mn THAI CHARACTER NIKHAHIT +0EB1 ; Other_Alphabetic # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EB9 ; Other_Alphabetic # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU +0EBB..0EBC ; Other_Alphabetic # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO +0ECD ; Other_Alphabetic # Mn LAO NIGGAHITA +0F71..0F7E ; Other_Alphabetic # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; Other_Alphabetic # Mc TIBETAN SIGN RNAM BCAD +0F80..0F81 ; Other_Alphabetic # Mn [2] TIBETAN VOWEL SIGN REVERSED I..TIBETAN VOWEL SIGN REVERSED II +0F8D..0F97 ; Other_Alphabetic # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Other_Alphabetic # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +102B..102C ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN E +1032..1036 ; Other_Alphabetic # Mn [5] MYANMAR VOWEL SIGN AI..MYANMAR SIGN ANUSVARA +1038 ; Other_Alphabetic # Mc MYANMAR SIGN VISARGA +103B..103C ; Other_Alphabetic # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; Other_Alphabetic # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1056..1057 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; Other_Alphabetic # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1062..1064 ; Other_Alphabetic # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1067..106D ; Other_Alphabetic # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +1071..1074 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; Other_Alphabetic # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +1087..108C ; Other_Alphabetic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; Other_Alphabetic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108F ; Other_Alphabetic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +109A..109C ; Other_Alphabetic # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109D ; Other_Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI +1712..1713 ; Other_Alphabetic # Mn [2] TAGALOG VOWEL SIGN I..TAGALOG VOWEL SIGN U +1732..1733 ; Other_Alphabetic # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1752..1753 ; Other_Alphabetic # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; Other_Alphabetic # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B6 ; Other_Alphabetic # Mc KHMER VOWEL SIGN AA +17B7..17BD ; Other_Alphabetic # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; Other_Alphabetic # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; Other_Alphabetic # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; Other_Alphabetic # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +1885..1886 ; Other_Alphabetic # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; Other_Alphabetic # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; Other_Alphabetic # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; Other_Alphabetic # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; Other_Alphabetic # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; Other_Alphabetic # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; Other_Alphabetic # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; Other_Alphabetic # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; Other_Alphabetic # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1A17..1A18 ; Other_Alphabetic # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A ; Other_Alphabetic # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B ; Other_Alphabetic # Mn BUGINESE VOWEL SIGN AE +1A55 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; Other_Alphabetic # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; Other_Alphabetic # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A61 ; Other_Alphabetic # Mc TAI THAM VOWEL SIGN A +1A62 ; Other_Alphabetic # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; Other_Alphabetic # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; Other_Alphabetic # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; Other_Alphabetic # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A74 ; Other_Alphabetic # Mn [2] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN MAI KANG +1B00..1B03 ; Other_Alphabetic # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; Other_Alphabetic # Mc BALINESE SIGN BISAH +1B35 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; Other_Alphabetic # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; Other_Alphabetic # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; Other_Alphabetic # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; Other_Alphabetic # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; Other_Alphabetic # Mn BALINESE VOWEL SIGN PEPET +1B43 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN PEPET TEDUNG +1B80..1B81 ; Other_Alphabetic # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; Other_Alphabetic # Mc SUNDANESE SIGN PANGWISAD +1BA1 ; Other_Alphabetic # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; Other_Alphabetic # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; Other_Alphabetic # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; Other_Alphabetic # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAC..1BAD ; Other_Alphabetic # Mn [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE7 ; Other_Alphabetic # Mc BATAK VOWEL SIGN E +1BE8..1BE9 ; Other_Alphabetic # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC ; Other_Alphabetic # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED ; Other_Alphabetic # Mn BATAK VOWEL SIGN KARO O +1BEE ; Other_Alphabetic # Mc BATAK VOWEL SIGN U +1BEF..1BF1 ; Other_Alphabetic # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C24..1C2B ; Other_Alphabetic # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C36 ; Other_Alphabetic # Mn LEPCHA SIGN RAN +1DE7..1DF4 ; Other_Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS +24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z +2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +A674..A67B ; Other_Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA +A69E..A69F ; Other_Alphabetic # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A802 ; Other_Alphabetic # Mn SYLOTI NAGRI SIGN DVISVARA +A80B ; Other_Alphabetic # Mn SYLOTI NAGRI SIGN ANUSVARA +A823..A824 ; Other_Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; Other_Alphabetic # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; Other_Alphabetic # Mc SYLOTI NAGRI VOWEL SIGN OO +A880..A881 ; Other_Alphabetic # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A8B4..A8C3 ; Other_Alphabetic # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C5 ; Other_Alphabetic # Mn SAURASHTRA SIGN CANDRABINDU +A8FF ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN AY +A926..A92A ; Other_Alphabetic # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O +A947..A951 ; Other_Alphabetic # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952 ; Other_Alphabetic # Mc REJANG CONSONANT SIGN H +A980..A982 ; Other_Alphabetic # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; Other_Alphabetic # Mc JAVANESE SIGN WIGNYAN +A9B4..A9B5 ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; Other_Alphabetic # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC..A9BD ; Other_Alphabetic # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9BE..A9BF ; Other_Alphabetic # Mc [2] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE CONSONANT SIGN CAKRA +A9E5 ; Other_Alphabetic # Mn MYANMAR SIGN SHAN SAW +AA29..AA2E ; Other_Alphabetic # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; Other_Alphabetic # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; Other_Alphabetic # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; Other_Alphabetic # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; Other_Alphabetic # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; Other_Alphabetic # Mc CHAM CONSONANT SIGN FINAL H +AA7B ; Other_Alphabetic # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; Other_Alphabetic # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; Other_Alphabetic # Mc MYANMAR SIGN TAI LAING TONE-5 +AAB0 ; Other_Alphabetic # Mn TAI VIET MAI KANG +AAB2..AAB4 ; Other_Alphabetic # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; Other_Alphabetic # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE ; Other_Alphabetic # Mn TAI VIET VOWEL AM +AAEB ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Other_Alphabetic # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN VISARGA +ABE3..ABE4 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA +10376..1037A ; Other_Alphabetic # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; Other_Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Other_Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Other_Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10D24..10D27 ; Other_Alphabetic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU +11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA +11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA +11038..11045 ; Other_Alphabetic # Mn [14] BRAHMI VOWEL SIGN AA..BRAHMI VOWEL SIGN AU +11082 ; Other_Alphabetic # Mc KAITHI SIGN VISARGA +110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; Other_Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +11100..11102 ; Other_Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Other_Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Other_Alphabetic # Mc CHAKMA VOWEL SIGN E +1112D..11132 ; Other_Alphabetic # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK +11145..11146 ; Other_Alphabetic # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11180..11181 ; Other_Alphabetic # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Other_Alphabetic # Mc SHARADA SIGN VISARGA +111B3..111B5 ; Other_Alphabetic # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Other_Alphabetic # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF ; Other_Alphabetic # Mc SHARADA VOWEL SIGN AU +1122C..1122E ; Other_Alphabetic # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231 ; Other_Alphabetic # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233 ; Other_Alphabetic # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234 ; Other_Alphabetic # Mn KHOJKI SIGN ANUSVARA +11237 ; Other_Alphabetic # Mn KHOJKI SIGN SHADDA +1123E ; Other_Alphabetic # Mn KHOJKI SIGN SUKUN +112DF ; Other_Alphabetic # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2 ; Other_Alphabetic # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112E8 ; Other_Alphabetic # Mn [6] KHUDAWADI VOWEL SIGN U..KHUDAWADI VOWEL SIGN AU +11300..11301 ; Other_Alphabetic # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303 ; Other_Alphabetic # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +1133E..1133F ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340 ; Other_Alphabetic # Mn GRANTHA VOWEL SIGN II +11341..11344 ; Other_Alphabetic # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134C ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11357 ; Other_Alphabetic # Mc GRANTHA AU LENGTH MARK +11362..11363 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11435..11437 ; Other_Alphabetic # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; Other_Alphabetic # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; Other_Alphabetic # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11443..11444 ; Other_Alphabetic # Mn [2] NEWA SIGN CANDRABINDU..NEWA SIGN ANUSVARA +11445 ; Other_Alphabetic # Mc NEWA SIGN VISARGA +114B0..114B2 ; Other_Alphabetic # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8 ; Other_Alphabetic # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9 ; Other_Alphabetic # Mc TIRHUTA VOWEL SIGN E +114BA ; Other_Alphabetic # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE ; Other_Alphabetic # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0 ; Other_Alphabetic # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1 ; Other_Alphabetic # Mc TIRHUTA SIGN VISARGA +115AF..115B1 ; Other_Alphabetic # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5 ; Other_Alphabetic # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB ; Other_Alphabetic # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD ; Other_Alphabetic # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE ; Other_Alphabetic # Mc SIDDHAM SIGN VISARGA +115DC..115DD ; Other_Alphabetic # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11630..11632 ; Other_Alphabetic # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A ; Other_Alphabetic # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C ; Other_Alphabetic # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D ; Other_Alphabetic # Mn MODI SIGN ANUSVARA +1163E ; Other_Alphabetic # Mc MODI SIGN VISARGA +11640 ; Other_Alphabetic # Mn MODI SIGN ARDHACANDRA +116AB ; Other_Alphabetic # Mn TAKRI SIGN ANUSVARA +116AC ; Other_Alphabetic # Mc TAKRI SIGN VISARGA +116AD ; Other_Alphabetic # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Other_Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Other_Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +1171D..1171F ; Other_Alphabetic # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721 ; Other_Alphabetic # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725 ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726 ; Other_Alphabetic # Mc AHOM VOWEL SIGN E +11727..1172A ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN AW..AHOM VOWEL SIGN AM +1182C..1182E ; Other_Alphabetic # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +1182F..11837 ; Other_Alphabetic # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11838 ; Other_Alphabetic # Mc DOGRA SIGN VISARGA +119D1..119D3 ; Other_Alphabetic # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119D4..119D7 ; Other_Alphabetic # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; Other_Alphabetic # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119DC..119DF ; Other_Alphabetic # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E4 ; Other_Alphabetic # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A01..11A0A ; Other_Alphabetic # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A35..11A38 ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE SIGN CANDRABINDU..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; Other_Alphabetic # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3B..11A3E ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A51..11A56 ; Other_Alphabetic # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; Other_Alphabetic # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; Other_Alphabetic # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Other_Alphabetic # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; Other_Alphabetic # Mc SOYOMBO SIGN VISARGA +11C2F ; Other_Alphabetic # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; Other_Alphabetic # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Other_Alphabetic # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; Other_Alphabetic # Mc BHAIKSUKI SIGN VISARGA +11C92..11CA7 ; Other_Alphabetic # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; Other_Alphabetic # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; Other_Alphabetic # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; Other_Alphabetic # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; Other_Alphabetic # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; Other_Alphabetic # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; Other_Alphabetic # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Other_Alphabetic # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Other_Alphabetic # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Other_Alphabetic # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D41 ; Other_Alphabetic # Mn [3] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI SIGN VISARGA +11D43 ; Other_Alphabetic # Mn MASARAM GONDI SIGN CANDRA +11D47 ; Other_Alphabetic # Mn MASARAM GONDI RA-KARA +11D8A..11D8E ; Other_Alphabetic # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D90..11D91 ; Other_Alphabetic # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D93..11D94 ; Other_Alphabetic # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D95 ; Other_Alphabetic # Mn GUNJALA GONDI SIGN ANUSVARA +11D96 ; Other_Alphabetic # Mc GUNJALA GONDI SIGN VISARGA +11EF3..11EF4 ; Other_Alphabetic # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11EF5..11EF6 ; Other_Alphabetic # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +16F4F ; Other_Alphabetic # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F51..16F87 ; Other_Alphabetic # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92 ; Other_Alphabetic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +1BC9E ; Other_Alphabetic # Mn DUPLOYAN DOUBLE MARK +1E000..1E006 ; Other_Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Other_Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Other_Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Other_Alphabetic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Other_Alphabetic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E947 ; Other_Alphabetic # Mn ADLAM HAMZA +1F130..1F149 ; Other_Alphabetic # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 1377 + +# ================================================ + +3006 ; Ideographic # Lo IDEOGRAPHIC CLOSING MARK +3007 ; Ideographic # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +3400..4DB5 ; Ideographic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 +4E00..9FEF ; Ideographic # Lo [20976] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEF +F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +17000..187F7 ; Ideographic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18AF2 ; Ideographic # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755 +1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 +2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 +2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D + +# Total code points: 96190 + +# ================================================ + +005E ; Diacritic # Sk CIRCUMFLEX ACCENT +0060 ; Diacritic # Sk GRAVE ACCENT +00A8 ; Diacritic # Sk DIAERESIS +00AF ; Diacritic # Sk MACRON +00B4 ; Diacritic # Sk ACUTE ACCENT +00B7 ; Diacritic # Po MIDDLE DOT +00B8 ; Diacritic # Sk CEDILLA +02B0..02C1 ; Diacritic # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C5 ; Diacritic # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02D1 ; Diacritic # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02DF ; Diacritic # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E0..02E4 ; Diacritic # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02E5..02EB ; Diacritic # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; Diacritic # Lm MODIFIER LETTER VOICING +02ED ; Diacritic # Sk MODIFIER LETTER UNASPIRATED +02EE ; Diacritic # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; Diacritic # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0300..034E ; Diacritic # Mn [79] COMBINING GRAVE ACCENT..COMBINING UPWARDS ARROW BELOW +0350..0357 ; Diacritic # Mn [8] COMBINING RIGHT ARROWHEAD ABOVE..COMBINING RIGHT HALF RING ABOVE +035D..0362 ; Diacritic # Mn [6] COMBINING DOUBLE BREVE..COMBINING DOUBLE RIGHTWARDS ARROW BELOW +0374 ; Diacritic # Lm GREEK NUMERAL SIGN +0375 ; Diacritic # Sk GREEK LOWER NUMERAL SIGN +037A ; Diacritic # Lm GREEK YPOGEGRAMMENI +0384..0385 ; Diacritic # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0483..0487 ; Diacritic # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0559 ; Diacritic # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0591..05A1 ; Diacritic # Mn [17] HEBREW ACCENT ETNAHTA..HEBREW ACCENT PAZER +05A3..05BD ; Diacritic # Mn [27] HEBREW ACCENT MUNAH..HEBREW POINT METEG +05BF ; Diacritic # Mn HEBREW POINT RAFE +05C1..05C2 ; Diacritic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4 ; Diacritic # Mn HEBREW MARK UPPER DOT +064B..0652 ; Diacritic # Mn [8] ARABIC FATHATAN..ARABIC SUKUN +0657..0658 ; Diacritic # Mn [2] ARABIC INVERTED DAMMA..ARABIC MARK NOON GHUNNA +06DF..06E0 ; Diacritic # Mn [2] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH UPRIGHT RECTANGULAR ZERO +06E5..06E6 ; Diacritic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06EA..06EC ; Diacritic # Mn [3] ARABIC EMPTY CENTRE LOW STOP..ARABIC ROUNDED HIGH STOP WITH FILLED CENTRE +0730..074A ; Diacritic # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; Diacritic # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; Diacritic # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; Diacritic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +0818..0819 ; Diacritic # Mn [2] SAMARITAN MARK OCCLUSION..SAMARITAN MARK DAGESH +08E3..08FE ; Diacritic # Mn [28] ARABIC TURNED DAMMA BELOW..ARABIC DAMMA WITH DOT +093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA +094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA +0951..0954 ; Diacritic # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT +0971 ; Diacritic # Lm DEVANAGARI SIGN HIGH SPACING DOT +09BC ; Diacritic # Mn BENGALI SIGN NUKTA +09CD ; Diacritic # Mn BENGALI SIGN VIRAMA +0A3C ; Diacritic # Mn GURMUKHI SIGN NUKTA +0A4D ; Diacritic # Mn GURMUKHI SIGN VIRAMA +0ABC ; Diacritic # Mn GUJARATI SIGN NUKTA +0ACD ; Diacritic # Mn GUJARATI SIGN VIRAMA +0AFD..0AFF ; Diacritic # Mn [3] GUJARATI SIGN THREE-DOT NUKTA ABOVE..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B3C ; Diacritic # Mn ORIYA SIGN NUKTA +0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA +0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA +0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA +0CBC ; Diacritic # Mn KANNADA SIGN NUKTA +0CCD ; Diacritic # Mn KANNADA SIGN VIRAMA +0D3B..0D3C ; Diacritic # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D4D ; Diacritic # Mn MALAYALAM SIGN VIRAMA +0DCA ; Diacritic # Mn SINHALA SIGN AL-LAKUNA +0E47..0E4C ; Diacritic # Mn [6] THAI CHARACTER MAITAIKHU..THAI CHARACTER THANTHAKHAT +0E4E ; Diacritic # Mn THAI CHARACTER YAMAKKAN +0EBA ; Diacritic # Mn LAO SIGN PALI VIRAMA +0EC8..0ECC ; Diacritic # Mn [5] LAO TONE MAI EK..LAO CANCELLATION MARK +0F18..0F19 ; Diacritic # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; Diacritic # Mn TIBETAN MARK TSA -PHRU +0F3E..0F3F ; Diacritic # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F82..0F84 ; Diacritic # Mn [3] TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HALANTA +0F86..0F87 ; Diacritic # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0FC6 ; Diacritic # Mn TIBETAN SYMBOL PADMA GDAN +1037 ; Diacritic # Mn MYANMAR SIGN DOT BELOW +1039..103A ; Diacritic # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +1063..1064 ; Diacritic # Mc [2] MYANMAR TONE MARK SGAW KAREN HATHI..MYANMAR TONE MARK SGAW KAREN KE PHO +1069..106D ; Diacritic # Mc [5] MYANMAR SIGN WESTERN PWO KAREN TONE-1..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +1087..108C ; Diacritic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; Diacritic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108F ; Diacritic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +109A..109B ; Diacritic # Mc [2] MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3 +135D..135F ; Diacritic # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; Diacritic # Mn KHMER SIGN ATTHACAN +1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A75..1A7C ; Diacritic # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Diacritic # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; Diacritic # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1B34 ; Diacritic # Mn BALINESE SIGN REREKAN +1B44 ; Diacritic # Mc BALINESE ADEG ADEG +1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1BAA ; Diacritic # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Diacritic # Mn SUNDANESE SIGN VIRAMA +1C36..1C37 ; Diacritic # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C78..1C7D ; Diacritic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1CD0..1CD2 ; Diacritic # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD3 ; Diacritic # Po VEDIC SIGN NIHSHVASA +1CD4..1CE0 ; Diacritic # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE1 ; Diacritic # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Diacritic # Mn VEDIC SIGN TIRYAK +1CF4 ; Diacritic # Mn VEDIC TONE CANDRA ABOVE +1CF7 ; Diacritic # Mc VEDIC SIGN ATIKRAMA +1CF8..1CF9 ; Diacritic # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW +1DF5..1DF9 ; Diacritic # Mn [5] COMBINING UP TACK ABOVE..COMBINING WIDE INVERTED BRIDGE BELOW +1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1FBD ; Diacritic # Sk GREEK KORONIS +1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FCD..1FCF ; Diacritic # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FDD..1FDF ; Diacritic # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FED..1FEF ; Diacritic # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FFD..1FFE ; Diacritic # Sk [2] GREEK OXIA..GREEK DASIA +2CEF..2CF1 ; Diacritic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2E2F ; Diacritic # Lm VERTICAL TILDE +302A..302D ; Diacritic # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Diacritic # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3099..309A ; Diacritic # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309B..309C ; Diacritic # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30FC ; Diacritic # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK +A66F ; Diacritic # Mn COMBINING CYRILLIC VZMET +A67C..A67D ; Diacritic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A67F ; Diacritic # Lm CYRILLIC PAYEROK +A69C..A69D ; Diacritic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A6F0..A6F1 ; Diacritic # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A700..A716 ; Diacritic # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A789..A78A ; Diacritic # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A7F8..A7F9 ; Diacritic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A8C4 ; Diacritic # Mn SAURASHTRA SIGN VIRAMA +A8E0..A8F1 ; Diacritic # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A92B..A92D ; Diacritic # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU +A92E ; Diacritic # Po KAYAH LI SIGN CWI +A953 ; Diacritic # Mc REJANG VIRAMA +A9B3 ; Diacritic # Mn JAVANESE SIGN CECAK TELU +A9C0 ; Diacritic # Mc JAVANESE PANGKON +A9E5 ; Diacritic # Mn MYANMAR SIGN SHAN SAW +AA7B ; Diacritic # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; Diacritic # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; Diacritic # Mc MYANMAR SIGN TAI LAING TONE-5 +AABF ; Diacritic # Mn TAI VIET TONE MAI EK +AAC0 ; Diacritic # Lo TAI VIET TONE MAI NUENG +AAC1 ; Diacritic # Mn TAI VIET TONE MAI THO +AAC2 ; Diacritic # Lo TAI VIET TONE MAI SONG +AAF6 ; Diacritic # Mn MEETEI MAYEK VIRAMA +AB5B ; Diacritic # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F ; Diacritic # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +ABEC ; Diacritic # Mc MEETEI MAYEK LUM IYEK +ABED ; Diacritic # Mn MEETEI MAYEK APUN IYEK +FB1E ; Diacritic # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE20..FE2F ; Diacritic # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FF3E ; Diacritic # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF40 ; Diacritic # Sk FULLWIDTH GRAVE ACCENT +FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFE3 ; Diacritic # Sk FULLWIDTH MACRON +102E0 ; Diacritic # Mn COPTIC EPACT THOUSANDS MARK +10AE5..10AE6 ; Diacritic # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D22..10D23 ; Diacritic # Lo [2] HANIFI ROHINGYA MARK SAKIN..HANIFI ROHINGYA MARK NA KHONNA +10D24..10D27 ; Diacritic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10F46..10F50 ; Diacritic # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +110B9..110BA ; Diacritic # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11133..11134 ; Diacritic # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA +11173 ; Diacritic # Mn MAHAJANI SIGN NUKTA +111C0 ; Diacritic # Mc SHARADA SIGN VIRAMA +111CA..111CC ; Diacritic # Mn [3] SHARADA SIGN NUKTA..SHARADA EXTRA SHORT VOWEL MARK +11235 ; Diacritic # Mc KHOJKI SIGN VIRAMA +11236 ; Diacritic # Mn KHOJKI SIGN NUKTA +112E9..112EA ; Diacritic # Mn [2] KHUDAWADI SIGN NUKTA..KHUDAWADI SIGN VIRAMA +1133C ; Diacritic # Mn GRANTHA SIGN NUKTA +1134D ; Diacritic # Mc GRANTHA SIGN VIRAMA +11366..1136C ; Diacritic # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; Diacritic # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11442 ; Diacritic # Mn NEWA SIGN VIRAMA +11446 ; Diacritic # Mn NEWA SIGN NUKTA +114C2..114C3 ; Diacritic # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115BF..115C0 ; Diacritic # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +1163F ; Diacritic # Mn MODI SIGN VIRAMA +116B6 ; Diacritic # Mc TAKRI SIGN VIRAMA +116B7 ; Diacritic # Mn TAKRI SIGN NUKTA +1172B ; Diacritic # Mn AHOM SIGN KILLER +11839..1183A ; Diacritic # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +119E0 ; Diacritic # Mn NANDINAGARI SIGN VIRAMA +11A34 ; Diacritic # Mn ZANABAZAR SQUARE SIGN VIRAMA +11A47 ; Diacritic # Mn ZANABAZAR SQUARE SUBJOINER +11A99 ; Diacritic # Mn SOYOMBO SUBJOINER +11C3F ; Diacritic # Mn BHAIKSUKI SIGN VIRAMA +11D42 ; Diacritic # Mn MASARAM GONDI SIGN NUKTA +11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA +11D97 ; Diacritic # Mn GUNJALA GONDI VIRAMA +16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; Diacritic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1E130..1E136 ; Diacritic # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2EC..1E2EF ; Diacritic # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E8D0..1E8D6 ; Diacritic # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK +1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA + +# Total code points: 873 + +# ================================================ + +00B7 ; Extender # Po MIDDLE DOT +02D0..02D1 ; Extender # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON +0640 ; Extender # Lm ARABIC TATWEEL +07FA ; Extender # Lm NKO LAJANYALAN +0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK +0EC6 ; Extender # Lm LAO KO LA +180A ; Extender # Po MONGOLIAN NIRUGU +1843 ; Extender # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1AA7 ; Extender # Lm TAI THAM SIGN MAI YAMOK +1C36 ; Extender # Mn LEPCHA SIGN RAN +1C7B ; Extender # Lm OL CHIKI RELAA +3005 ; Extender # Lm IDEOGRAPHIC ITERATION MARK +3031..3035 ; Extender # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +309D..309E ; Extender # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +30FC..30FE ; Extender # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +A015 ; Extender # Lm YI SYLLABLE WU +A60C ; Extender # Lm VAI SYLLABLE LENGTHENER +A9CF ; Extender # Lm JAVANESE PANGRANGKEP +A9E6 ; Extender # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +AA70 ; Extender # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AADD ; Extender # Lm TAI VIET SYMBOL SAM +AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +1135D ; Extender # Lo GRANTHA SIGN PLUTA +115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3 +11A98 ; Extender # Mn SOYOMBO GEMINATION MARK +16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM +16FE0..16FE1 ; Extender # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; Extender # Lm OLD CHINESE ITERATION MARK +1E13C..1E13D ; Extender # Lm [2] NYIAKENG PUACHUE HMONG SIGN XW XW..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK + +# Total code points: 47 + +# ================================================ + +00AA ; Other_Lowercase # Lo FEMININE ORDINAL INDICATOR +00BA ; Other_Lowercase # Lo MASCULINE ORDINAL INDICATOR +02B0..02B8 ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02C0..02C1 ; Other_Lowercase # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP +02E0..02E4 ; Other_Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +0345 ; Other_Lowercase # Mn COMBINING GREEK YPOGEGRAMMENI +037A ; Other_Lowercase # Lm GREEK YPOGEGRAMMENI +1D2C..1D6A ; Other_Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D78 ; Other_Lowercase # Lm MODIFIER LETTER CYRILLIC EN +1D9B..1DBF ; Other_Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +2071 ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Other_Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2170..217F ; Other_Lowercase # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +24D0..24E9 ; Other_Lowercase # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C7C..2C7D ; Other_Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +A69C..A69D ; Other_Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A770 ; Other_Lowercase # Lm MODIFIER LETTER US +A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +AB5C..AB5F ; Other_Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK + +# Total code points: 189 + +# ================================================ + +2160..216F ; Other_Uppercase # Nl [16] ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND +24B6..24CF ; Other_Uppercase # So [26] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z +1F130..1F149 ; Other_Uppercase # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Other_Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Other_Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 120 + +# ================================================ + +FDD0..FDEF ; Noncharacter_Code_Point # Cn [32] .. +FFFE..FFFF ; Noncharacter_Code_Point # Cn [2] .. +1FFFE..1FFFF ; Noncharacter_Code_Point # Cn [2] .. +2FFFE..2FFFF ; Noncharacter_Code_Point # Cn [2] .. +3FFFE..3FFFF ; Noncharacter_Code_Point # Cn [2] .. +4FFFE..4FFFF ; Noncharacter_Code_Point # Cn [2] .. +5FFFE..5FFFF ; Noncharacter_Code_Point # Cn [2] .. +6FFFE..6FFFF ; Noncharacter_Code_Point # Cn [2] .. +7FFFE..7FFFF ; Noncharacter_Code_Point # Cn [2] .. +8FFFE..8FFFF ; Noncharacter_Code_Point # Cn [2] .. +9FFFE..9FFFF ; Noncharacter_Code_Point # Cn [2] .. +AFFFE..AFFFF ; Noncharacter_Code_Point # Cn [2] .. +BFFFE..BFFFF ; Noncharacter_Code_Point # Cn [2] .. +CFFFE..CFFFF ; Noncharacter_Code_Point # Cn [2] .. +DFFFE..DFFFF ; Noncharacter_Code_Point # Cn [2] .. +EFFFE..EFFFF ; Noncharacter_Code_Point # Cn [2] .. +FFFFE..FFFFF ; Noncharacter_Code_Point # Cn [2] .. +10FFFE..10FFFF; Noncharacter_Code_Point # Cn [2] .. + +# Total code points: 66 + +# ================================================ + +09BE ; Other_Grapheme_Extend # Mc BENGALI VOWEL SIGN AA +09D7 ; Other_Grapheme_Extend # Mc BENGALI AU LENGTH MARK +0B3E ; Other_Grapheme_Extend # Mc ORIYA VOWEL SIGN AA +0B57 ; Other_Grapheme_Extend # Mc ORIYA AU LENGTH MARK +0BBE ; Other_Grapheme_Extend # Mc TAMIL VOWEL SIGN AA +0BD7 ; Other_Grapheme_Extend # Mc TAMIL AU LENGTH MARK +0CC2 ; Other_Grapheme_Extend # Mc KANNADA VOWEL SIGN UU +0CD5..0CD6 ; Other_Grapheme_Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0D3E ; Other_Grapheme_Extend # Mc MALAYALAM VOWEL SIGN AA +0D57 ; Other_Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK +0DCF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA +0DDF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA +1B35 ; Other_Grapheme_Extend # Mc BALINESE VOWEL SIGN TEDUNG +200C ; Other_Grapheme_Extend # Cf ZERO WIDTH NON-JOINER +302E..302F ; Other_Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +1133E ; Other_Grapheme_Extend # Mc GRANTHA VOWEL SIGN AA +11357 ; Other_Grapheme_Extend # Mc GRANTHA AU LENGTH MARK +114B0 ; Other_Grapheme_Extend # Mc TIRHUTA VOWEL SIGN AA +114BD ; Other_Grapheme_Extend # Mc TIRHUTA VOWEL SIGN SHORT O +115AF ; Other_Grapheme_Extend # Mc SIDDHAM VOWEL SIGN AA +1D165 ; Other_Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM +1D16E..1D172 ; Other_Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 +E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG + +# Total code points: 126 + +# ================================================ + +2FF0..2FF1 ; IDS_Binary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW +2FF4..2FFB ; IDS_Binary_Operator # So [8] IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID + +# Total code points: 10 + +# ================================================ + +2FF2..2FF3 ; IDS_Trinary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW + +# Total code points: 2 + +# ================================================ + +2E80..2E99 ; Radical # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; Radical # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; Radical # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE + +# Total code points: 329 + +# ================================================ + +3400..4DB5 ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 +4E00..9FEF ; Unified_Ideograph # Lo [20976] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEF +FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F +FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11 +FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14 +FA1F ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA1F +FA21 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA21 +FA23..FA24 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24 +FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29 +20000..2A6D6 ; Unified_Ideograph # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 +2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 +2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 + +# Total code points: 87887 + +# ================================================ + +034F ; Other_Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER +115F..1160 ; Other_Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER +17B4..17B5 ; Other_Default_Ignorable_Code_Point # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +2065 ; Other_Default_Ignorable_Code_Point # Cn +3164 ; Other_Default_Ignorable_Code_Point # Lo HANGUL FILLER +FFA0 ; Other_Default_Ignorable_Code_Point # Lo HALFWIDTH HANGUL FILLER +FFF0..FFF8 ; Other_Default_Ignorable_Code_Point # Cn [9] .. +E0000 ; Other_Default_Ignorable_Code_Point # Cn +E0002..E001F ; Other_Default_Ignorable_Code_Point # Cn [30] .. +E0080..E00FF ; Other_Default_Ignorable_Code_Point # Cn [128] .. +E01F0..E0FFF ; Other_Default_Ignorable_Code_Point # Cn [3600] .. + +# Total code points: 3776 + +# ================================================ + +0149 ; Deprecated # L& LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +0673 ; Deprecated # Lo ARABIC LETTER ALEF WITH WAVY HAMZA BELOW +0F77 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC RR +0F79 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC LL +17A3..17A4 ; Deprecated # Lo [2] KHMER INDEPENDENT VOWEL QAQ..KHMER INDEPENDENT VOWEL QAA +206A..206F ; Deprecated # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES +2329 ; Deprecated # Ps LEFT-POINTING ANGLE BRACKET +232A ; Deprecated # Pe RIGHT-POINTING ANGLE BRACKET +E0001 ; Deprecated # Cf LANGUAGE TAG + +# Total code points: 15 + +# ================================================ + +0069..006A ; Soft_Dotted # L& [2] LATIN SMALL LETTER I..LATIN SMALL LETTER J +012F ; Soft_Dotted # L& LATIN SMALL LETTER I WITH OGONEK +0249 ; Soft_Dotted # L& LATIN SMALL LETTER J WITH STROKE +0268 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE +029D ; Soft_Dotted # L& LATIN SMALL LETTER J WITH CROSSED-TAIL +02B2 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J +03F3 ; Soft_Dotted # L& GREEK LETTER YOT +0456 ; Soft_Dotted # L& CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0458 ; Soft_Dotted # L& CYRILLIC SMALL LETTER JE +1D62 ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER I +1D96 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH RETROFLEX HOOK +1DA4 ; Soft_Dotted # Lm MODIFIER LETTER SMALL I WITH STROKE +1DA8 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J WITH CROSSED-TAIL +1E2D ; Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW +1ECB ; Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW +2071 ; Soft_Dotted # Lm SUPERSCRIPT LATIN SMALL LETTER I +2148..2149 ; Soft_Dotted # L& [2] DOUBLE-STRUCK ITALIC SMALL I..DOUBLE-STRUCK ITALIC SMALL J +2C7C ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER J +1D422..1D423 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SMALL I..MATHEMATICAL BOLD SMALL J +1D456..1D457 ; Soft_Dotted # L& [2] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL J +1D48A..1D48B ; Soft_Dotted # L& [2] MATHEMATICAL BOLD ITALIC SMALL I..MATHEMATICAL BOLD ITALIC SMALL J +1D4BE..1D4BF ; Soft_Dotted # L& [2] MATHEMATICAL SCRIPT SMALL I..MATHEMATICAL SCRIPT SMALL J +1D4F2..1D4F3 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SCRIPT SMALL I..MATHEMATICAL BOLD SCRIPT SMALL J +1D526..1D527 ; Soft_Dotted # L& [2] MATHEMATICAL FRAKTUR SMALL I..MATHEMATICAL FRAKTUR SMALL J +1D55A..1D55B ; Soft_Dotted # L& [2] MATHEMATICAL DOUBLE-STRUCK SMALL I..MATHEMATICAL DOUBLE-STRUCK SMALL J +1D58E..1D58F ; Soft_Dotted # L& [2] MATHEMATICAL BOLD FRAKTUR SMALL I..MATHEMATICAL BOLD FRAKTUR SMALL J +1D5C2..1D5C3 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF SMALL I..MATHEMATICAL SANS-SERIF SMALL J +1D5F6..1D5F7 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD SMALL I..MATHEMATICAL SANS-SERIF BOLD SMALL J +1D62A..1D62B ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF ITALIC SMALL I..MATHEMATICAL SANS-SERIF ITALIC SMALL J +1D65E..1D65F ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL I..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL J +1D692..1D693 ; Soft_Dotted # L& [2] MATHEMATICAL MONOSPACE SMALL I..MATHEMATICAL MONOSPACE SMALL J + +# Total code points: 46 + +# ================================================ + +0E40..0E44 ; Logical_Order_Exception # Lo [5] THAI CHARACTER SARA E..THAI CHARACTER SARA AI MAIMALAI +0EC0..0EC4 ; Logical_Order_Exception # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +19B5..19B7 ; Logical_Order_Exception # Lo [3] NEW TAI LUE VOWEL SIGN E..NEW TAI LUE VOWEL SIGN O +19BA ; Logical_Order_Exception # Lo NEW TAI LUE VOWEL SIGN AY +AAB5..AAB6 ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB9 ; Logical_Order_Exception # Lo TAI VIET VOWEL UEA +AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY + +# Total code points: 19 + +# ================================================ + +1885..1886 ; Other_ID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +2118 ; Other_ID_Start # Sm SCRIPT CAPITAL P +212E ; Other_ID_Start # So ESTIMATED SYMBOL +309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + +# Total code points: 6 + +# ================================================ + +00B7 ; Other_ID_Continue # Po MIDDLE DOT +0387 ; Other_ID_Continue # Po GREEK ANO TELEIA +1369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE +19DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE + +# Total code points: 12 + +# ================================================ + +0021 ; Sentence_Terminal # Po EXCLAMATION MARK +002E ; Sentence_Terminal # Po FULL STOP +003F ; Sentence_Terminal # Po QUESTION MARK +0589 ; Sentence_Terminal # Po ARMENIAN FULL STOP +061E..061F ; Sentence_Terminal # Po [2] ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK +06D4 ; Sentence_Terminal # Po ARABIC FULL STOP +0700..0702 ; Sentence_Terminal # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP +07F9 ; Sentence_Terminal # Po NKO EXCLAMATION MARK +0837 ; Sentence_Terminal # Po SAMARITAN PUNCTUATION MELODIC QITSA +0839 ; Sentence_Terminal # Po SAMARITAN PUNCTUATION QITSA +083D..083E ; Sentence_Terminal # Po [2] SAMARITAN PUNCTUATION SOF MASHFAAT..SAMARITAN PUNCTUATION ANNAAU +0964..0965 ; Sentence_Terminal # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +104A..104B ; Sentence_Terminal # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION +1362 ; Sentence_Terminal # Po ETHIOPIC FULL STOP +1367..1368 ; Sentence_Terminal # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +166E ; Sentence_Terminal # Po CANADIAN SYLLABICS FULL STOP +1735..1736 ; Sentence_Terminal # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +1803 ; Sentence_Terminal # Po MONGOLIAN FULL STOP +1809 ; Sentence_Terminal # Po MONGOLIAN MANCHU FULL STOP +1944..1945 ; Sentence_Terminal # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1AA8..1AAB ; Sentence_Terminal # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU +1B5A..1B5B ; Sentence_Terminal # Po [2] BALINESE PANTI..BALINESE PAMADA +1B5E..1B5F ; Sentence_Terminal # Po [2] BALINESE CARIK SIKI..BALINESE CARIK PAREREN +1C3B..1C3C ; Sentence_Terminal # Po [2] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION NYET THYOOM TA-ROL +1C7E..1C7F ; Sentence_Terminal # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +203C..203D ; Sentence_Terminal # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +2047..2049 ; Sentence_Terminal # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2E2E ; Sentence_Terminal # Po REVERSED QUESTION MARK +2E3C ; Sentence_Terminal # Po STENOGRAPHIC FULL STOP +3002 ; Sentence_Terminal # Po IDEOGRAPHIC FULL STOP +A4FF ; Sentence_Terminal # Po LISU PUNCTUATION FULL STOP +A60E..A60F ; Sentence_Terminal # Po [2] VAI FULL STOP..VAI QUESTION MARK +A6F3 ; Sentence_Terminal # Po BAMUM FULL STOP +A6F7 ; Sentence_Terminal # Po BAMUM QUESTION MARK +A876..A877 ; Sentence_Terminal # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD +A8CE..A8CF ; Sentence_Terminal # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A92F ; Sentence_Terminal # Po KAYAH LI SIGN SHYA +A9C8..A9C9 ; Sentence_Terminal # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI +AA5D..AA5F ; Sentence_Terminal # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AAF0..AAF1 ; Sentence_Terminal # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +ABEB ; Sentence_Terminal # Po MEETEI MAYEK CHEIKHEI +FE52 ; Sentence_Terminal # Po SMALL FULL STOP +FE56..FE57 ; Sentence_Terminal # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK +FF01 ; Sentence_Terminal # Po FULLWIDTH EXCLAMATION MARK +FF0E ; Sentence_Terminal # Po FULLWIDTH FULL STOP +FF1F ; Sentence_Terminal # Po FULLWIDTH QUESTION MARK +FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP +10A56..10A57 ; Sentence_Terminal # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA +10F55..10F59 ; Sentence_Terminal # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +11047..11048 ; Sentence_Terminal # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA +110BE..110C1 ; Sentence_Terminal # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; Sentence_Terminal # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; Sentence_Terminal # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA +111CD ; Sentence_Terminal # Po SHARADA SUTRA MARK +111DE..111DF ; Sentence_Terminal # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2 +11238..11239 ; Sentence_Terminal # Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA +1123B..1123C ; Sentence_Terminal # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK +112A9 ; Sentence_Terminal # Po MULTANI SECTION MARK +1144B..1144C ; Sentence_Terminal # Po [2] NEWA DANDA..NEWA DOUBLE DANDA +115C2..115C3 ; Sentence_Terminal # Po [2] SIDDHAM DANDA..SIDDHAM DOUBLE DANDA +115C9..115D7 ; Sentence_Terminal # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +11641..11642 ; Sentence_Terminal # Po [2] MODI DANDA..MODI DOUBLE DANDA +1173C..1173E ; Sentence_Terminal # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +11A42..11A43 ; Sentence_Terminal # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD +11A9B..11A9C ; Sentence_Terminal # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD +11C41..11C42 ; Sentence_Terminal # Po [2] BHAIKSUKI DANDA..BHAIKSUKI DOUBLE DANDA +11EF7..11EF8 ; Sentence_Terminal # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +16A6E..16A6F ; Sentence_Terminal # Po [2] MRO DANDA..MRO DOUBLE DANDA +16AF5 ; Sentence_Terminal # Po BASSA VAH FULL STOP +16B37..16B38 ; Sentence_Terminal # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB +16B44 ; Sentence_Terminal # Po PAHAWH HMONG SIGN XAUS +16E98 ; Sentence_Terminal # Po MEDEFAIDRIN FULL STOP +1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP + +# Total code points: 141 + +# ================================================ + +180B..180D ; Variation_Selector # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +FE00..FE0F ; Variation_Selector # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 259 + +# ================================================ + +0009..000D ; Pattern_White_Space # Cc [5] .. +0020 ; Pattern_White_Space # Zs SPACE +0085 ; Pattern_White_Space # Cc +200E..200F ; Pattern_White_Space # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +2028 ; Pattern_White_Space # Zl LINE SEPARATOR +2029 ; Pattern_White_Space # Zp PARAGRAPH SEPARATOR + +# Total code points: 11 + +# ================================================ + +0021..0023 ; Pattern_Syntax # Po [3] EXCLAMATION MARK..NUMBER SIGN +0024 ; Pattern_Syntax # Sc DOLLAR SIGN +0025..0027 ; Pattern_Syntax # Po [3] PERCENT SIGN..APOSTROPHE +0028 ; Pattern_Syntax # Ps LEFT PARENTHESIS +0029 ; Pattern_Syntax # Pe RIGHT PARENTHESIS +002A ; Pattern_Syntax # Po ASTERISK +002B ; Pattern_Syntax # Sm PLUS SIGN +002C ; Pattern_Syntax # Po COMMA +002D ; Pattern_Syntax # Pd HYPHEN-MINUS +002E..002F ; Pattern_Syntax # Po [2] FULL STOP..SOLIDUS +003A..003B ; Pattern_Syntax # Po [2] COLON..SEMICOLON +003C..003E ; Pattern_Syntax # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; Pattern_Syntax # Po [2] QUESTION MARK..COMMERCIAL AT +005B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET +005C ; Pattern_Syntax # Po REVERSE SOLIDUS +005D ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET +005E ; Pattern_Syntax # Sk CIRCUMFLEX ACCENT +0060 ; Pattern_Syntax # Sk GRAVE ACCENT +007B ; Pattern_Syntax # Ps LEFT CURLY BRACKET +007C ; Pattern_Syntax # Sm VERTICAL LINE +007D ; Pattern_Syntax # Pe RIGHT CURLY BRACKET +007E ; Pattern_Syntax # Sm TILDE +00A1 ; Pattern_Syntax # Po INVERTED EXCLAMATION MARK +00A2..00A5 ; Pattern_Syntax # Sc [4] CENT SIGN..YEN SIGN +00A6 ; Pattern_Syntax # So BROKEN BAR +00A7 ; Pattern_Syntax # Po SECTION SIGN +00A9 ; Pattern_Syntax # So COPYRIGHT SIGN +00AB ; Pattern_Syntax # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; Pattern_Syntax # Sm NOT SIGN +00AE ; Pattern_Syntax # So REGISTERED SIGN +00B0 ; Pattern_Syntax # So DEGREE SIGN +00B1 ; Pattern_Syntax # Sm PLUS-MINUS SIGN +00B6 ; Pattern_Syntax # Po PILCROW SIGN +00BB ; Pattern_Syntax # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BF ; Pattern_Syntax # Po INVERTED QUESTION MARK +00D7 ; Pattern_Syntax # Sm MULTIPLICATION SIGN +00F7 ; Pattern_Syntax # Sm DIVISION SIGN +2010..2015 ; Pattern_Syntax # Pd [6] HYPHEN..HORIZONTAL BAR +2016..2017 ; Pattern_Syntax # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE +2018 ; Pattern_Syntax # Pi LEFT SINGLE QUOTATION MARK +2019 ; Pattern_Syntax # Pf RIGHT SINGLE QUOTATION MARK +201A ; Pattern_Syntax # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Pattern_Syntax # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Pattern_Syntax # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Pattern_Syntax # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Pattern_Syntax # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2027 ; Pattern_Syntax # Po [8] DAGGER..HYPHENATION POINT +2030..2038 ; Pattern_Syntax # Po [9] PER MILLE SIGN..CARET +2039 ; Pattern_Syntax # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Pattern_Syntax # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B..203E ; Pattern_Syntax # Po [4] REFERENCE MARK..OVERLINE +2041..2043 ; Pattern_Syntax # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2044 ; Pattern_Syntax # Sm FRACTION SLASH +2045 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; Pattern_Syntax # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; Pattern_Syntax # Sm COMMERCIAL MINUS SIGN +2053 ; Pattern_Syntax # Po SWUNG DASH +2055..205E ; Pattern_Syntax # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +2190..2194 ; Pattern_Syntax # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; Pattern_Syntax # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; Pattern_Syntax # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; Pattern_Syntax # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; Pattern_Syntax # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; Pattern_Syntax # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; Pattern_Syntax # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; Pattern_Syntax # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; Pattern_Syntax # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; Pattern_Syntax # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; Pattern_Syntax # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21CD ; Pattern_Syntax # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; Pattern_Syntax # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Pattern_Syntax # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; Pattern_Syntax # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; Pattern_Syntax # So DOWNWARDS DOUBLE ARROW +21D4 ; Pattern_Syntax # Sm LEFT RIGHT DOUBLE ARROW +21D5..21F3 ; Pattern_Syntax # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW +21F4..22FF ; Pattern_Syntax # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP +2300..2307 ; Pattern_Syntax # So [8] DIAMETER SIGN..WAVY LINE +2308 ; Pattern_Syntax # Ps LEFT CEILING +2309 ; Pattern_Syntax # Pe RIGHT CEILING +230A ; Pattern_Syntax # Ps LEFT FLOOR +230B ; Pattern_Syntax # Pe RIGHT FLOOR +230C..231F ; Pattern_Syntax # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER +2320..2321 ; Pattern_Syntax # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; Pattern_Syntax # So [7] FROWN..KEYBOARD +2329 ; Pattern_Syntax # Ps LEFT-POINTING ANGLE BRACKET +232A ; Pattern_Syntax # Pe RIGHT-POINTING ANGLE BRACKET +232B..237B ; Pattern_Syntax # So [81] ERASE TO THE LEFT..NOT CHECK MARK +237C ; Pattern_Syntax # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..239A ; Pattern_Syntax # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL +239B..23B3 ; Pattern_Syntax # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB ; Pattern_Syntax # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1 ; Pattern_Syntax # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..2426 ; Pattern_Syntax # So [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO +2427..243F ; Pattern_Syntax # Cn [25] .. +2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +244B..245F ; Pattern_Syntax # Cn [21] .. +2500..25B6 ; Pattern_Syntax # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE +25B7 ; Pattern_Syntax # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25C0 ; Pattern_Syntax # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; Pattern_Syntax # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25F7 ; Pattern_Syntax # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FF ; Pattern_Syntax # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2600..266E ; Pattern_Syntax # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN +266F ; Pattern_Syntax # Sm MUSIC SHARP SIGN +2670..2767 ; Pattern_Syntax # So [248] WEST SYRIAC CROSS..ROTATED FLORAL HEART BULLET +2768 ; Pattern_Syntax # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; Pattern_Syntax # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; Pattern_Syntax # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; Pattern_Syntax # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; Pattern_Syntax # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; Pattern_Syntax # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; Pattern_Syntax # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; Pattern_Syntax # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; Pattern_Syntax # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; Pattern_Syntax # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2794..27BF ; Pattern_Syntax # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP +27C0..27C4 ; Pattern_Syntax # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; Pattern_Syntax # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Pattern_Syntax # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5 ; Pattern_Syntax # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Pattern_Syntax # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Pattern_Syntax # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Pattern_Syntax # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Pattern_Syntax # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Pattern_Syntax # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; Pattern_Syntax # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2800..28FF ; Pattern_Syntax # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 +2900..2982 ; Pattern_Syntax # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; Pattern_Syntax # Ps LEFT WHITE CURLY BRACKET +2984 ; Pattern_Syntax # Pe RIGHT WHITE CURLY BRACKET +2985 ; Pattern_Syntax # Ps LEFT WHITE PARENTHESIS +2986 ; Pattern_Syntax # Pe RIGHT WHITE PARENTHESIS +2987 ; Pattern_Syntax # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Pattern_Syntax # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Pattern_Syntax # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Pattern_Syntax # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Pattern_Syntax # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Pattern_Syntax # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Pattern_Syntax # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Pattern_Syntax # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Pattern_Syntax # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Pattern_Syntax # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; Pattern_Syntax # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; Pattern_Syntax # Ps LEFT WIGGLY FENCE +29D9 ; Pattern_Syntax # Pe RIGHT WIGGLY FENCE +29DA ; Pattern_Syntax # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Pattern_Syntax # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; Pattern_Syntax # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; Pattern_Syntax # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Pattern_Syntax # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; Pattern_Syntax # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B00..2B2F ; Pattern_Syntax # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE +2B30..2B44 ; Pattern_Syntax # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46 ; Pattern_Syntax # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C ; Pattern_Syntax # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B4D..2B73 ; Pattern_Syntax # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR +2B74..2B75 ; Pattern_Syntax # Cn [2] .. +2B76..2B95 ; Pattern_Syntax # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B96..2B97 ; Pattern_Syntax # Cn [2] .. +2B98..2BFF ; Pattern_Syntax # So [104] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..HELLSCHREIBER PAUSE SYMBOL +2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET +2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; Pattern_Syntax # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; Pattern_Syntax # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; Pattern_Syntax # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; Pattern_Syntax # Pi LEFT TRANSPOSITION BRACKET +2E0A ; Pattern_Syntax # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; Pattern_Syntax # Po RAISED SQUARE +2E0C ; Pattern_Syntax # Pi LEFT RAISED OMISSION BRACKET +2E0D ; Pattern_Syntax # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; Pattern_Syntax # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; Pattern_Syntax # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19 ; Pattern_Syntax # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A ; Pattern_Syntax # Pd HYPHEN WITH DIAERESIS +2E1B ; Pattern_Syntax # Po TILDE WITH RING ABOVE +2E1C ; Pattern_Syntax # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; Pattern_Syntax # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F ; Pattern_Syntax # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20 ; Pattern_Syntax # Pi LEFT VERTICAL BAR WITH QUILL +2E21 ; Pattern_Syntax # Pf RIGHT VERTICAL BAR WITH QUILL +2E22 ; Pattern_Syntax # Ps TOP LEFT HALF BRACKET +2E23 ; Pattern_Syntax # Pe TOP RIGHT HALF BRACKET +2E24 ; Pattern_Syntax # Ps BOTTOM LEFT HALF BRACKET +2E25 ; Pattern_Syntax # Pe BOTTOM RIGHT HALF BRACKET +2E26 ; Pattern_Syntax # Ps LEFT SIDEWAYS U BRACKET +2E27 ; Pattern_Syntax # Pe RIGHT SIDEWAYS U BRACKET +2E28 ; Pattern_Syntax # Ps LEFT DOUBLE PARENTHESIS +2E29 ; Pattern_Syntax # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E ; Pattern_Syntax # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F ; Pattern_Syntax # Lm VERTICAL TILDE +2E30..2E39 ; Pattern_Syntax # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; Pattern_Syntax # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E3F ; Pattern_Syntax # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM +2E40 ; Pattern_Syntax # Pd DOUBLE HYPHEN +2E41 ; Pattern_Syntax # Po REVERSED COMMA +2E42 ; Pattern_Syntax # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E43..2E4F ; Pattern_Syntax # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER +2E50..2E7F ; Pattern_Syntax # Cn [48] .. +3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET +3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET +300A ; Pattern_Syntax # Ps LEFT DOUBLE ANGLE BRACKET +300B ; Pattern_Syntax # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; Pattern_Syntax # Ps LEFT CORNER BRACKET +300D ; Pattern_Syntax # Pe RIGHT CORNER BRACKET +300E ; Pattern_Syntax # Ps LEFT WHITE CORNER BRACKET +300F ; Pattern_Syntax # Pe RIGHT WHITE CORNER BRACKET +3010 ; Pattern_Syntax # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; Pattern_Syntax # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; Pattern_Syntax # So [2] POSTAL MARK..GETA MARK +3014 ; Pattern_Syntax # Ps LEFT TORTOISE SHELL BRACKET +3015 ; Pattern_Syntax # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; Pattern_Syntax # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; Pattern_Syntax # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; Pattern_Syntax # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; Pattern_Syntax # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; Pattern_Syntax # Ps LEFT WHITE SQUARE BRACKET +301B ; Pattern_Syntax # Pe RIGHT WHITE SQUARE BRACKET +301C ; Pattern_Syntax # Pd WAVE DASH +301D ; Pattern_Syntax # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Pattern_Syntax # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; Pattern_Syntax # So POSTAL MARK FACE +3030 ; Pattern_Syntax # Pd WAVY DASH +FD3E ; Pattern_Syntax # Pe ORNATE LEFT PARENTHESIS +FD3F ; Pattern_Syntax # Ps ORNATE RIGHT PARENTHESIS +FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT + +# Total code points: 2760 + +# ================================================ + +0600..0605 ; Prepended_Concatenation_Mark # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +06DD ; Prepended_Concatenation_Mark # Cf ARABIC END OF AYAH +070F ; Prepended_Concatenation_Mark # Cf SYRIAC ABBREVIATION MARK +08E2 ; Prepended_Concatenation_Mark # Cf ARABIC DISPUTED END OF AYAH +110BD ; Prepended_Concatenation_Mark # Cf KAITHI NUMBER SIGN +110CD ; Prepended_Concatenation_Mark # Cf KAITHI NUMBER SIGN ABOVE + +# Total code points: 11 + +# ================================================ + +1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z + +# Total code points: 26 + +# EOF diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests.csproj b/src/libraries/System.Runtime/tests/System.Runtime.Tests.csproj index 20c33e4..2e2bfce 100644 --- a/src/libraries/System.Runtime/tests/System.Runtime.Tests.csproj +++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests.csproj @@ -240,6 +240,7 @@ + @@ -286,6 +287,7 @@ + @@ -319,9 +321,23 @@ + + System\Text\Unicode\UnicodeData.11.0.txt + UnicodeData.11.0.txt + + + System\Text\Unicode\CaseFolding-12.1.0.txt + CaseFolding-12.1.0.txt + + + System\Text\Unicode\PropList-12.1.0.txt + PropList-12.1.0.txt + + + - \ No newline at end of file + diff --git a/src/libraries/System.Runtime/tests/System/CharTests.cs b/src/libraries/System.Runtime/tests/System/CharTests.cs index 46ef814..b7a6a47 100644 --- a/src/libraries/System.Runtime/tests/System/CharTests.cs +++ b/src/libraries/System.Runtime/tests/System/CharTests.cs @@ -9,7 +9,7 @@ using Xunit; namespace System.Tests { - public class CharTests + public partial class CharTests { [Theory] [InlineData('h', 'h', 0)] diff --git a/src/libraries/System.Runtime/tests/System/CharTests.netcoreapp.cs b/src/libraries/System.Runtime/tests/System/CharTests.netcoreapp.cs new file mode 100644 index 0000000..5af4344 --- /dev/null +++ b/src/libraries/System.Runtime/tests/System/CharTests.netcoreapp.cs @@ -0,0 +1,168 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Globalization; +using System.Text.Unicode.Tests; +using Xunit; +using Xunit.Sdk; + +namespace System.Tests +{ + public partial class CharTests + { + [OuterLoop] + [Fact] + public static void GetUnicodeCategory_Char_AllInputs() + { + // This tests calls char.GetUnicodeCategory for every possible input, ensuring that + // the runtime agrees with the data in the core Unicode files. + + for (uint i = 0; i <= char.MaxValue; i++) + { + UnicodeCategory expected; + + // The code points in the switch block below must be special-cased + // because they switched categories between versions of the Unicode + // specification. For compatibility reasons Char keeps its own copy + // of the categories for the first 256 code points, as it's locked + // to an earlier version of the standard. For an example of a code + // point that switched categories, see the discussion on U+00AD + // SOFT HYPHEN at https://www.unicode.org/versions/Unicode4.0.0/. + + switch (i) + { + case '\u00a7': + case '\u00b6': + expected = UnicodeCategory.OtherSymbol; + break; + + case '\u00aa': + case '\u00ba': + expected = UnicodeCategory.LowercaseLetter; + break; + + case '\u00ad': + expected = UnicodeCategory.DashPunctuation; + break; + + default: + expected = UnicodeData.GetUnicodeCategory(i); + break; + } + + if (expected != char.GetUnicodeCategory((char)i)) + { + // We'll build up the exception message ourselves so the dev knows what code point failed. + throw new AssertActualExpectedException( + expected: expected, + actual: char.GetUnicodeCategory((char)i), + userMessage: FormattableString.Invariant($@"char.GetUnicodeCategory('\u{i:X4}') returned wrong value.")); + } + } + } + + [OuterLoop] + [Fact] + public static void IsLetter_Char_AllInputs() + { + // This tests calls char.IsLetter for every possible input, ensuring that + // the runtime agrees with the data in the core Unicode files. + + for (uint i = 0; i <= char.MaxValue; i++) + { + if (UnicodeData.IsLetter((char)i) != char.IsLetter((char)i)) + { + // We'll build up the exception message ourselves so the dev knows what code point failed. + throw new AssertActualExpectedException( + expected: UnicodeData.IsLetter((char)i), + actual: char.IsLetter((char)i), + userMessage: FormattableString.Invariant($@"char.IsLetter('\u{i:X4}') returned wrong value.")); + } + } + } + + [OuterLoop] + [Fact] + public static void IsLower_Char_AllInputs() + { + // This tests calls char.IsLower for every possible input, ensuring that + // the runtime agrees with the data in the core Unicode files. + + for (uint i = 0; i <= char.MaxValue; i++) + { + bool expected; + + switch (i) + { + case '\u00AA': // FEMININE ORDINAL INDICATOR + case '\u00BA': // MASCULINE ORDINAL INDICATOR + + // In Unicode 6.1 the code points U+00AA and U+00BA were reassigned + // from category Ll to category Lo. However, for compatibility reasons, + // Char uses the older version of the Unicode standard for code points + // in the range U+0000..U+00FF. So we'll special-case these here. + // More info: https://www.unicode.org/review/pri181/ + + expected = true; + break; + + default: + expected = UnicodeData.GetUnicodeCategory((char)i) == UnicodeCategory.LowercaseLetter; + break; + } + + if (expected != char.IsLower((char)i)) + { + // We'll build up the exception message ourselves so the dev knows what code point failed. + throw new AssertActualExpectedException( + expected: expected, + actual: char.IsLower((char)i), + userMessage: FormattableString.Invariant($@"char.IsLower('\u{i:X4}') returned wrong value.")); + } + } + } + + [OuterLoop] + [Fact] + public static void IsUpper_Char_AllInputs() + { + // This tests calls char.IsUpper for every possible input, ensuring that + // the runtime agrees with the data in the core Unicode files. + + for (uint i = 0; i <= char.MaxValue; i++) + { + bool expected = UnicodeData.GetUnicodeCategory((char)i) == UnicodeCategory.UppercaseLetter; + + if (expected != char.IsUpper((char)i)) + { + // We'll build up the exception message ourselves so the dev knows what code point failed. + throw new AssertActualExpectedException( + expected: expected, + actual: char.IsUpper((char)i), + userMessage: FormattableString.Invariant($@"char.IsUpper('\u{i:X4}') returned wrong value.")); + } + } + } + + [OuterLoop] + [Fact] + public static void IsWhiteSpace_Char_AllInputs() + { + // This tests calls char.IsWhiteSpace for every possible input, ensuring that + // the runtime agrees with the data in the core Unicode files. + + for (uint i = 0; i <= char.MaxValue; i++) + { + if (UnicodeData.IsWhiteSpace(i) != char.IsWhiteSpace((char)i)) + { + // We'll build up the exception message ourselves so the dev knows what code point failed. + throw new AssertActualExpectedException( + expected: UnicodeData.IsWhiteSpace(i), + actual: char.IsWhiteSpace((char)i), + userMessage: FormattableString.Invariant($@"char.IsWhiteSpace('\u{i:X4}') returned wrong value.")); + } + } + } + } +} diff --git a/src/libraries/System.Runtime/tests/System/Text/RuneTests.TestData.netcoreapp.cs b/src/libraries/System.Runtime/tests/System/Text/RuneTests.TestData.netcoreapp.cs index b006385..0143436 100644 --- a/src/libraries/System.Runtime/tests/System/Text/RuneTests.TestData.netcoreapp.cs +++ b/src/libraries/System.Runtime/tests/System/Text/RuneTests.TestData.netcoreapp.cs @@ -10,6 +10,19 @@ namespace System.Text.Tests { public static partial class RuneTests { + public static IEnumerable AllRunes() + { + for (uint i = 0; i < 0xD800; i++) + { + yield return new Rune(i); + } + + for (uint i = 0xE000; i <= 0x10FFFF; i++) + { + yield return new Rune(i); + } + } + public static IEnumerable GeneralTestData_BmpCodePoints_NoSurrogates() { yield return new object[] diff --git a/src/libraries/System.Runtime/tests/System/Text/RuneTests.netcoreapp.cs b/src/libraries/System.Runtime/tests/System/Text/RuneTests.netcoreapp.cs index 942c715..28e097f 100644 --- a/src/libraries/System.Runtime/tests/System/Text/RuneTests.netcoreapp.cs +++ b/src/libraries/System.Runtime/tests/System/Text/RuneTests.netcoreapp.cs @@ -4,7 +4,9 @@ using System.Buffers; using System.Globalization; +using System.Text.Unicode.Tests; using Xunit; +using Xunit.Sdk; namespace System.Text.Tests { @@ -339,6 +341,26 @@ namespace System.Text.Tests Assert.Equal(testData.UnicodeCategory, Rune.GetUnicodeCategory(testData.ScalarValue)); } + [OuterLoop] + [Fact] + public static void GetUnicodeCategory_AllInputs() + { + // This tests calls Rune.GetUnicodeCategory for every possible input, ensuring that + // the runtime agrees with the data in the core Unicode files. + + foreach (Rune rune in AllRunes()) + { + if (UnicodeData.GetUnicodeCategory((uint)rune.Value) != Rune.GetUnicodeCategory(rune)) + { + // We'll build up the exception message ourselves so the dev knows what code point failed. + throw new AssertActualExpectedException( + expected: UnicodeData.GetUnicodeCategory((uint)rune.Value), + actual: Rune.GetUnicodeCategory(rune), + userMessage: FormattableString.Invariant($@"Rune.GetUnicodeCategory(U+{rune.Value:X4}) returned wrong value.")); + } + } + } + [Theory] [MemberData(nameof(UnicodeInfoTestData_Latin1AndSelectOthers))] public static void IsControl(UnicodeInfoTestData testData) @@ -424,6 +446,19 @@ namespace System.Text.Tests Assert.Equal(testData.IsWhiteSpace, Rune.IsWhiteSpace(testData.ScalarValue)); } + [OuterLoop] + [Fact] + public static void IsWhiteSpace_AllInputs() + { + // This tests calls Rune.IsWhiteSpace for every possible input, ensuring that + // the runtime agrees with the data in the core Unicode files. + + foreach (Rune rune in AllRunes()) + { + Assert.Equal(UnicodeData.IsWhiteSpace((uint)rune.Value), Rune.IsWhiteSpace(rune)); + } + } + [Theory] [InlineData(0, 0)] [InlineData(0x80, 0x80)] diff --git a/src/libraries/System.Runtime/tests/System/Text/Unicode/UnicodeData.netcoreapp.cs b/src/libraries/System.Runtime/tests/System/Text/Unicode/UnicodeData.netcoreapp.cs new file mode 100644 index 0000000..df9201c --- /dev/null +++ b/src/libraries/System.Runtime/tests/System/Text/Unicode/UnicodeData.netcoreapp.cs @@ -0,0 +1,297 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Text.RegularExpressions; +using Xunit; +using Xunit.Sdk; + +namespace System.Text.Unicode.Tests +{ + /* + * This file contains helpers for parsing the Unicode data files, which we can then use to test + * our Framework's implementations of various character & text processing routines. + */ + + public static class UnicodeData + { + private const string UnicodeDataFilename = "UnicodeData.11.0.txt"; + private const string CaseFoldingFilename = "CaseFolding-12.1.0.txt"; + private const string PropListFilename = "PropList-12.1.0.txt"; + + private static readonly Dictionary _categoryCodeMap = new Dictionary() + { + { "Lu", UnicodeCategory.UppercaseLetter }, + { "Ll", UnicodeCategory.LowercaseLetter }, + { "Lt", UnicodeCategory.TitlecaseLetter }, + { "Lm", UnicodeCategory.ModifierLetter }, + { "Lo", UnicodeCategory.OtherLetter }, + { "Mn", UnicodeCategory.NonSpacingMark }, + { "Mc", UnicodeCategory.SpacingCombiningMark }, + { "Me", UnicodeCategory.EnclosingMark }, + { "Nd", UnicodeCategory.DecimalDigitNumber }, + { "Nl", UnicodeCategory.LetterNumber }, + { "No", UnicodeCategory.OtherNumber }, + { "Pc", UnicodeCategory.ConnectorPunctuation }, + { "Pd", UnicodeCategory.DashPunctuation }, + { "Ps", UnicodeCategory.OpenPunctuation }, + { "Pe", UnicodeCategory.ClosePunctuation }, + { "Pi", UnicodeCategory.InitialQuotePunctuation }, + { "Pf", UnicodeCategory.FinalQuotePunctuation }, + { "Po", UnicodeCategory.OtherPunctuation }, + { "Sm", UnicodeCategory.MathSymbol }, + { "Sc", UnicodeCategory.CurrencySymbol }, + { "Sk", UnicodeCategory.ModifierSymbol }, + { "So", UnicodeCategory.OtherSymbol }, + { "Zs", UnicodeCategory.SpaceSeparator }, + { "Zl", UnicodeCategory.LineSeparator }, + { "Zp", UnicodeCategory.ParagraphSeparator }, + { "Cc", UnicodeCategory.Control }, + { "Cf", UnicodeCategory.Format }, + { "Cs", UnicodeCategory.Surrogate }, + { "Co", UnicodeCategory.PrivateUse }, + }; + + private static readonly Dictionary _categoryMap = new Dictionary(); + private static readonly Dictionary _propertyMap = new Dictionary(); + private static readonly Dictionary _simpleCaseFoldMap = new Dictionary(); + private static readonly Dictionary _simpleLowerCaseMap = new Dictionary(); + private static readonly Dictionary _simpleTitleCaseMap = new Dictionary(); + private static readonly Dictionary _simpleUpperCaseMap = new Dictionary(); + + static UnicodeData() + { + static IEnumerable ReadAllLines(string resourceName) + { + using (Stream stream = typeof(UnicodeData).Assembly.GetManifestResourceStream(resourceName)) + { + if (stream is null) + { + throw new XunitException($"Could not locate resource stream {resourceName} in the assembly."); + } + + List allLines = new List(); + + using (StreamReader reader = new StreamReader(stream)) + { + string nextLine; + while ((nextLine = reader.ReadLine()) != null) + { + yield return nextLine; + } + } + } + } + + // First, read the basic category, bidi data, and simple upper / lower / title mappings from UnicodeData.txt + // File format is documented at https://www.unicode.org/reports/tr44/#UnicodeData.txt + + string nameOfCurrentRange = null; + uint startOfCurrentRange = 0; + + foreach (string line in ReadAllLines(UnicodeDataFilename)) + { + string[] splitLine = line.Split(';'); + Assert.Equal(15, splitLine.Length); // expected 15 segments resulting from the split + + uint currentCodePoint = uint.Parse(splitLine[0], NumberStyles.HexNumber, CultureInfo.InvariantCulture); + string codePointName = splitLine[1]; + string category = splitLine[2]; + + // Is this the start of a range? + // If so, save this data and keep iterating. + + if (codePointName.EndsWith(", First>", StringComparison.Ordinal)) + { + startOfCurrentRange = currentCodePoint; + nameOfCurrentRange = codePointName[..^(", First>".Length)]; + continue; + } + + // If we didn't see a <..., First> range immediately before + // this, assume it's a range of length 1. + + if (codePointName != nameOfCurrentRange + ", Last>") + { + startOfCurrentRange = currentCodePoint; + } + + uint.TryParse(splitLine[12], NumberStyles.HexNumber, CultureInfo.InvariantCulture, out uint simpleUpperMapping); + uint.TryParse(splitLine[13], NumberStyles.HexNumber, CultureInfo.InvariantCulture, out uint simpleLowerMapping); + uint.TryParse(splitLine[14], NumberStyles.HexNumber, CultureInfo.InvariantCulture, out uint simpleTitleMapping); + + for (uint i = startOfCurrentRange; i <= currentCodePoint; i++) + { + _categoryMap[i] = _categoryCodeMap[category]; + + if (simpleLowerMapping != 0) + { + _simpleLowerCaseMap[i] = simpleLowerMapping; + } + + if (simpleUpperMapping != 0) + { + _simpleUpperCaseMap[i] = simpleUpperMapping; + } + + if (simpleTitleMapping != 0) + { + _simpleTitleCaseMap[i] = simpleTitleMapping; + } + } + } + + // Then, read the property map from PropList.txt. + // We're looking for lines of the form "XXXX[..YYYY] ; # " + + Regex propListRegex = new Regex(@"^\s*(?[0-9A-F]{4,})(\.\.(?[0-9A-F]{4,}))?\s*;\s*(?\w+)\s*#", RegexOptions.IgnoreCase); + + foreach (string line in ReadAllLines(PropListFilename)) + { + Match match = propListRegex.Match(line); + if (!match.Success) + { + continue; // line was blank or was comment-only + } + + uint firstCodePoint = uint.Parse(match.Groups["firstCodePoint"].Value, NumberStyles.HexNumber, CultureInfo.InvariantCulture); + if (!uint.TryParse(match.Groups["lastCodePoint"].Value, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out uint lastCodePoint)) + { + lastCodePoint = firstCodePoint; // If not "XXXX..YYYY", assume "XXXX..XXXX" + } + CharProperty property = Enum.Parse(match.Groups["propName"].Value); + + for (uint i = firstCodePoint; i <= lastCodePoint; i++) + { + _propertyMap.TryGetValue(i, out CharProperty existingValue); + _propertyMap[i] = existingValue | property; // remember, this is flags + } + } + + // Finally, read the case fold map from CaseFolding.txt. + // We're looking for lines of the form "; ; ; # ", where is 'C' or 'S' + + Regex caseFoldRegex = new Regex(@"^\s*(?[0-9A-F]{4,}); [CS]; (?[0-9A-F]{4,});", RegexOptions.IgnoreCase); + + foreach (string line in ReadAllLines(CaseFoldingFilename)) + { + Match match = caseFoldRegex.Match(line); + if (!match.Success) + { + continue; // line was blank, comment-only, or not simple / common + } + + uint fromCodePoint = uint.Parse(match.Groups["fromCodePoint"].Value, NumberStyles.HexNumber, CultureInfo.InvariantCulture); + uint toCodePoint = uint.Parse(match.Groups["toCodePoint"].Value, NumberStyles.HexNumber, CultureInfo.InvariantCulture); + + _simpleCaseFoldMap[fromCodePoint] = toCodePoint; + } + } + + public static UnicodeCategory GetUnicodeCategory(uint codePoint) + { + Assert.True(codePoint <= 0x10FFFF, "Invalid code point provided."); + + return _categoryMap.TryGetValue(codePoint, out UnicodeCategory category) ? category : UnicodeCategory.OtherNotAssigned; + } + + public static bool IsLetter(uint codePoint) + { + switch (GetUnicodeCategory(codePoint)) + { + case UnicodeCategory.UppercaseLetter: + case UnicodeCategory.LowercaseLetter: + case UnicodeCategory.TitlecaseLetter: + case UnicodeCategory.ModifierLetter: + case UnicodeCategory.OtherLetter: + return true; + + default: + return false; + } + } + + // Returns true iff code point is listed at https://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:whitespace:] + public static bool IsWhiteSpace(uint codePoint) + { + Assert.True(codePoint <= 0x10FFFF, "Invalid code point provided."); + + _propertyMap.TryGetValue(codePoint, out CharProperty property); + return property.HasFlag(CharProperty.White_Space); + } + + public static uint SimpleFoldCaseMap(uint codePoint) + { + Assert.True(codePoint <= 0x10FFFF, "Invalid code point provided."); + + // If an entry doesn't exist, the value maps to itself. + return _simpleCaseFoldMap.TryGetValue(codePoint, out uint value) ? value : codePoint; + } + + public static uint SimpleLowerCaseMap(uint codePoint) + { + Assert.True(codePoint <= 0x10FFFF, "Invalid code point provided."); + + // If an entry doesn't exist, the value maps to itself. + return _simpleLowerCaseMap.TryGetValue(codePoint, out uint value) ? value : codePoint; + } + + public static uint SimpleTitleCaseMap(uint codePoint) + { + Assert.True(codePoint <= 0x10FFFF, "Invalid code point provided."); + + // If an entry doesn't exist, the value maps to itself. + return _simpleTitleCaseMap.TryGetValue(codePoint, out uint value) ? value : codePoint; + } + + public static uint SimpleUpperCaseMap(uint codePoint) + { + Assert.True(codePoint <= 0x10FFFF, "Invalid code point provided."); + + // If an entry doesn't exist, the value maps to itself. + return _simpleUpperCaseMap.TryGetValue(codePoint, out uint value) ? value : codePoint; + } + + [Flags] + private enum CharProperty : ulong + { + ASCII_Hex_Digit = 1ul << 0, + Bidi_Control = 1ul << 1, + Dash = 1ul << 2, + Deprecated = 1ul << 3, + Diacritic = 1ul << 4, + Extender = 1ul << 5, + Hex_Digit = 1ul << 6, + Hyphen = 1ul << 7, + Ideographic = 1ul << 8, + IDS_Binary_Operator = 1ul << 9, + IDS_Trinary_Operator = 1ul << 10, + Join_Control = 1ul << 11, + Logical_Order_Exception = 1ul << 12, + Noncharacter_Code_Point = 1ul << 13, + Other_Alphabetic = 1ul << 14, + Other_Default_Ignorable_Code_Point = 1ul << 15, + Other_Grapheme_Extend = 1ul << 16, + Other_ID_Continue = 1ul << 17, + Other_ID_Start = 1ul << 18, + Other_Lowercase = 1ul << 19, + Other_Math = 1ul << 20, + Other_Uppercase = 1ul << 21, + Pattern_Syntax = 1ul << 22, + Pattern_White_Space = 1ul << 23, + Prepended_Concatenation_Mark = 1ul << 24, + Quotation_Mark = 1ul << 25, + Radical = 1ul << 26, + Regional_Indicator = 1ul << 27, + Sentence_Terminal = 1ul << 28, + Soft_Dotted = 1ul << 29, + Terminal_Punctuation = 1ul << 30, + Unified_Ideograph = 1ul << 31, + Variation_Selector = 1ul << 32, + White_Space = 1ul << 33, + } + } +}