return "( " . join( " || ", @ranges ) . " )";
}
- # If the input set has certain characteristics, we can optimize tests
- # for it. This doesn't apply if returning the code point, as we want
- # each element of the set individually. The code above is for this
- # simpler case.
-
- return 1 if @$cond == 256; # If all bytes match, is trivially true
-
- if (@ranges > 1) {
- # See if the entire set shares optimizable characterstics, and if
- # so, return the optimization. We delay checking for this on sets
- # with just a single range, as there may be better optimizations
- # available in that case.
- my ($mask, $base) = calculate_mask(@$cond);
- if (defined $mask && defined $base) {
- return sprintf "( ( $test & $self->{val_fmt} ) == $self->{val_fmt} )", $mask, $base;
- }
+ # If the input set has certain characteristics, we can optimize tests
+ # for it. This doesn't apply if returning the code point, as we want
+ # each element of the set individually. The code above is for this
+ # simpler case.
+
+ return 1 if @$cond == 256; # If all bytes match, is trivially true
+
+ if (@ranges > 1) {
+ # See if the entire set shares optimizable characterstics, and if so,
+ # return the optimization. We delay checking for this on sets with
+ # just a single range, as there may be better optimizations available
+ # in that case.
+ my ($mask, $base) = calculate_mask(@$cond);
+ if (defined $mask && defined $base) {
+ return sprintf "( ( $test & $self->{val_fmt} ) == $self->{val_fmt} )", $mask, $base;
}
+ }
- # Here, there was no entire-class optimization. Look at each range.
- for (my $i = 0; $i < @ranges; $i++) {
- if (! ref $ranges[$i]) { # Trivial case: no range
- $ranges[$i] = sprintf "$self->{val_fmt} == $test", $ranges[$i];
- }
- elsif ($ranges[$i]->[0] == $ranges[$i]->[1]) {
- $ranges[$i] = # Trivial case: single element range
- sprintf "$self->{val_fmt} == $test", $ranges[$i]->[0];
- }
- else {
- my $output = "";
-
- # Well-formed UTF-8 continuation bytes on ascii platforms must
- # be in the range 0x80 .. 0xBF. If we know that the input is
- # well-formed (indicated by not trying to be 'safe'), we can
- # omit tests that verify that the input is within either of
- # these bounds. (No legal UTF-8 character can begin with
- # anything in this range, so we don't have to worry about this
- # being a continuation byte or not.)
- if (ASCII_PLATFORM
- && ! $opts_ref->{safe}
- && $opts_ref->{type} =~ / ^ (?: utf8 | high ) $ /xi)
- {
- my $lower_limit_is_80 = ($ranges[$i]->[0] == 0x80);
- my $upper_limit_is_BF = ($ranges[$i]->[1] == 0xBF);
-
- # If the range is the entire legal range, it matches any
- # legal byte, so we can omit both tests. (This should
- # happen only if the number of ranges is 1.)
- if ($lower_limit_is_80 && $upper_limit_is_BF) {
- return 1;
- }
- elsif ($lower_limit_is_80) { # Just use the upper limit test
- $output = sprintf("( $test <= $self->{val_fmt} )",
- $ranges[$i]->[1]);
- }
- elsif ($upper_limit_is_BF) { # Just use the lower limit test
- $output = sprintf("( $test >= $self->{val_fmt} )",
- $ranges[$i]->[0]);
- }
+ # Here, there was no entire-class optimization. Look at each range.
+ for (my $i = 0; $i < @ranges; $i++) {
+ if (! ref $ranges[$i]) { # Trivial case: no range
+ $ranges[$i] = sprintf "$self->{val_fmt} == $test", $ranges[$i];
+ }
+ elsif ($ranges[$i]->[0] == $ranges[$i]->[1]) {
+ $ranges[$i] = # Trivial case: single element range
+ sprintf "$self->{val_fmt} == $test", $ranges[$i]->[0];
+ }
+ else {
+ my $output = "";
+
+ # Well-formed UTF-8 continuation bytes on ascii platforms must be
+ # in the range 0x80 .. 0xBF. If we know that the input is
+ # well-formed (indicated by not trying to be 'safe'), we can omit
+ # tests that verify that the input is within either of these
+ # bounds. (No legal UTF-8 character can begin with anything in
+ # this range, so we don't have to worry about this being a
+ # continuation byte or not.)
+ if (ASCII_PLATFORM
+ && ! $opts_ref->{safe}
+ && $opts_ref->{type} =~ / ^ (?: utf8 | high ) $ /xi)
+ {
+ my $lower_limit_is_80 = ($ranges[$i]->[0] == 0x80);
+ my $upper_limit_is_BF = ($ranges[$i]->[1] == 0xBF);
+
+ # If the range is the entire legal range, it matches any legal
+ # byte, so we can omit both tests. (This should happen only
+ # if the number of ranges is 1.)
+ if ($lower_limit_is_80 && $upper_limit_is_BF) {
+ return 1;
}
-
- # If we didn't change to omit a test above, see if the number
- # of elements is a power of 2 (only a single bit in the
- # representation of its count will be set) and if so, it may
- # be that a mask/compare optimization is possible.
- if ($output eq ""
- && pop_count($ranges[$i]->[1] - $ranges[$i]->[0] + 1) == 1)
- {
- my @list;
- push @list, $_ for ($ranges[$i]->[0] .. $ranges[$i]->[1]);
- my ($mask, $base) = calculate_mask(@list);
- if (defined $mask && defined $base) {
- $output = sprintf "( $test & $self->{val_fmt} ) == $self->{val_fmt}", $mask, $base;
- }
+ elsif ($lower_limit_is_80) { # Just use the upper limit test
+ $output = sprintf("( $test <= $self->{val_fmt} )",
+ $ranges[$i]->[1]);
}
-
- if ($output ne "") { # Prefer any optimization
- $ranges[$i] = $output;
+ elsif ($upper_limit_is_BF) { # Just use the lower limit test
+ $output = sprintf("( $test >= $self->{val_fmt} )",
+ $ranges[$i]->[0]);
}
- elsif ($ranges[$i]->[0] + 1 == $ranges[$i]->[1]) {
- # No optimization happened. We need a test that the code
- # point is within both bounds. But, if the bounds are
- # adjacent code points, it is cleaner to say
- # 'first == test || second == test'
- # than it is to say
- # 'first <= test && test <= second'
- $ranges[$i] = "( "
- . join( " || ", ( map
- { sprintf "$self->{val_fmt} == $test", $_ }
- @{$ranges[$i]} ) )
- . " )";
- }
- else { # Full bounds checking
- $ranges[$i] = sprintf("( $self->{val_fmt} <= $test && $test <= $self->{val_fmt} )", $ranges[$i]->[0], $ranges[$i]->[1]);
+ }
+
+ # If we didn't change to omit a test above, see if the number of
+ # elements is a power of 2 (only a single bit in the
+ # representation of its count will be set) and if so, it may be
+ # that a mask/compare optimization is possible.
+ if ($output eq ""
+ && pop_count($ranges[$i]->[1] - $ranges[$i]->[0] + 1) == 1)
+ {
+ my @list;
+ push @list, $_ for ($ranges[$i]->[0] .. $ranges[$i]->[1]);
+ my ($mask, $base) = calculate_mask(@list);
+ if (defined $mask && defined $base) {
+ $output = sprintf "( $test & $self->{val_fmt} ) == $self->{val_fmt}", $mask, $base;
}
}
+
+ if ($output ne "") { # Prefer any optimization
+ $ranges[$i] = $output;
+ }
+ elsif ($ranges[$i]->[0] + 1 == $ranges[$i]->[1]) {
+ # No optimization happened. We need a test that the code
+ # point is within both bounds. But, if the bounds are
+ # adjacent code points, it is cleaner to say
+ # 'first == test || second == test'
+ # than it is to say
+ # 'first <= test && test <= second'
+ $ranges[$i] = "( "
+ . join( " || ", ( map
+ { sprintf "$self->{val_fmt} == $test", $_ }
+ @{$ranges[$i]} ) )
+ . " )";
+ }
+ else { # Full bounds checking
+ $ranges[$i] = sprintf("( $self->{val_fmt} <= $test && $test <= $self->{val_fmt} )", $ranges[$i]->[0], $ranges[$i]->[1]);
+ }
}
+ }
return "( " . join( " || ", @ranges ) . " )";