($General{$name} ||= Table->New)->$op($code, $name);
# 005F: SPACING UNDERSCORE
- $Cat{Word}->$op($code) if $cat =~ /^[LMN]/ || $code == 0x005F;
+ $Cat{Word}->$op($code) if $cat =~ /^[LMN]|Pc/;
$Cat{Alnum}->$op($code) if $cat =~ /^[LM]|Nd/;
$Cat{Alpha}->$op($code) if $cat =~ /^[LM]/;
-
-
- $Cat{Space}->$op($code) if $cat =~ /^Z/
+ my $isspace =
+ ($cat =~ /Zs|Zl|Zp/ &&
+ $code != 0x200B) # 200B is ZWSP which is for line break control
+ # and therefore it is not part of "space" even while it is "Zs".
|| $code == 0x0009 # 0009: HORIZONTAL TAB
|| $code == 0x000A # 000A: LINE FEED
|| $code == 0x000B # 000B: VERTICAL TAB
|| $code == 0x000C # 000C: FORM FEED
|| $code == 0x000D # 000D: CARRIAGE RETURN
- || $code == 0x0085; # 0085: NEL
+ || $code == 0x0085 # 0085: NEL
+
+ ;
+ $Cat{Space}->$op($code) if $isspace;
- $Cat{SpacePerl}->$op($code) if $cat =~ /^Z/
- || $code == 0x0009 # 0009: HORIZONTAL TAB
- || $code == 0x000A # 000A: LINE FEED
- || $code == 0x000C # 000C: FORM FEED
- || $code == 0x000D # 000D: CARRIAGE RETURN
- || $code == 0x0085 # 0085: <NEXT LINE>
- || $code == 0x2028 # 2028: LINE SEPARATOR
- || $code == 0x2029;# 2029: PARAGRAPH SEP.
+ $Cat{SpacePerl}->$op($code) if $isspace
+ && $code != 0x000B; # Backward compat.
- $Cat{Blank}->$op($code) if $cat eq "Zs"
- || $code == 0x0009; # 0009: HORIZONTAL TAB
+ $Cat{Blank}->$op($code) if $isspace
+ && !($code == 0x000A ||
+ $code == 0x000B ||
+ $code == 0x000C ||
+ $code == 0x000D ||
+ $code == 0x0085 ||
+ $cat =~ /^Z[lp]/);
$Cat{Digit}->$op($code) if $cat eq "Nd";
$Cat{Upper}->$op($code) if $cat eq "Lu";
$Cat{Title}->$op($code) if $cat eq "Lt";
$Cat{ASCII}->$op($code) if $code <= 0x007F;
$Cat{Cntrl}->$op($code) if $cat =~ /^C/;
- $Cat{Graph}->$op($code) if $cat !~ /Zs|Cc|Cs|Cn/;
- $Cat{Print}->$op($code) if $cat =~ /^[LMNPS]/
- || $cat eq "Zs";
+ my $isgraph = !$isspace && $cat !~ /Cc|Cs|Cn/;
+ $Cat{Graph}->$op($code) if $isgraph;
+ $Cat{Print}->$op($code) if $isgraph || $isspace;
$Cat{Punct}->$op($code) if $cat =~ /^P/;
$Cat{XDigit}->$op($code) if ($code >= 0x30 && $code <= 0x39) ## 0..9