From 2358c533570dc87f10a95c0f732bcc2e93f75904 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 20 Oct 2012 11:27:44 -0600 Subject: [PATCH] regen/regcharclass.pl: White-space only; no code changes This outdents some code that is no longer contained in a block, and reflows the comments to 80 columns; --- regen/regcharclass.pl | 176 +++++++++++++++++++++++++------------------------- 1 file changed, 88 insertions(+), 88 deletions(-) diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl index b95d740..cb971a0 100755 --- a/regen/regcharclass.pl +++ b/regen/regcharclass.pl @@ -758,102 +758,102 @@ sub _cond_as_str { return "( " . join( " || ", @ranges ) . " )"; } - # If the input set has certain characteristics, we can optimize tests - # for it. This doesn't apply if returning the code point, as we want - # each element of the set individually. The code above is for this - # simpler case. - - return 1 if @$cond == 256; # If all bytes match, is trivially true - - if (@ranges > 1) { - # See if the entire set shares optimizable characterstics, and if - # so, return the optimization. We delay checking for this on sets - # with just a single range, as there may be better optimizations - # available in that case. - my ($mask, $base) = calculate_mask(@$cond); - if (defined $mask && defined $base) { - return sprintf "( ( $test & $self->{val_fmt} ) == $self->{val_fmt} )", $mask, $base; - } + # If the input set has certain characteristics, we can optimize tests + # for it. This doesn't apply if returning the code point, as we want + # each element of the set individually. The code above is for this + # simpler case. + + return 1 if @$cond == 256; # If all bytes match, is trivially true + + if (@ranges > 1) { + # See if the entire set shares optimizable characterstics, and if so, + # return the optimization. We delay checking for this on sets with + # just a single range, as there may be better optimizations available + # in that case. + my ($mask, $base) = calculate_mask(@$cond); + if (defined $mask && defined $base) { + return sprintf "( ( $test & $self->{val_fmt} ) == $self->{val_fmt} )", $mask, $base; } + } - # Here, there was no entire-class optimization. Look at each range. - for (my $i = 0; $i < @ranges; $i++) { - if (! ref $ranges[$i]) { # Trivial case: no range - $ranges[$i] = sprintf "$self->{val_fmt} == $test", $ranges[$i]; - } - elsif ($ranges[$i]->[0] == $ranges[$i]->[1]) { - $ranges[$i] = # Trivial case: single element range - sprintf "$self->{val_fmt} == $test", $ranges[$i]->[0]; - } - else { - my $output = ""; - - # Well-formed UTF-8 continuation bytes on ascii platforms must - # be in the range 0x80 .. 0xBF. If we know that the input is - # well-formed (indicated by not trying to be 'safe'), we can - # omit tests that verify that the input is within either of - # these bounds. (No legal UTF-8 character can begin with - # anything in this range, so we don't have to worry about this - # being a continuation byte or not.) - if (ASCII_PLATFORM - && ! $opts_ref->{safe} - && $opts_ref->{type} =~ / ^ (?: utf8 | high ) $ /xi) - { - my $lower_limit_is_80 = ($ranges[$i]->[0] == 0x80); - my $upper_limit_is_BF = ($ranges[$i]->[1] == 0xBF); - - # If the range is the entire legal range, it matches any - # legal byte, so we can omit both tests. (This should - # happen only if the number of ranges is 1.) - if ($lower_limit_is_80 && $upper_limit_is_BF) { - return 1; - } - elsif ($lower_limit_is_80) { # Just use the upper limit test - $output = sprintf("( $test <= $self->{val_fmt} )", - $ranges[$i]->[1]); - } - elsif ($upper_limit_is_BF) { # Just use the lower limit test - $output = sprintf("( $test >= $self->{val_fmt} )", - $ranges[$i]->[0]); - } + # Here, there was no entire-class optimization. Look at each range. + for (my $i = 0; $i < @ranges; $i++) { + if (! ref $ranges[$i]) { # Trivial case: no range + $ranges[$i] = sprintf "$self->{val_fmt} == $test", $ranges[$i]; + } + elsif ($ranges[$i]->[0] == $ranges[$i]->[1]) { + $ranges[$i] = # Trivial case: single element range + sprintf "$self->{val_fmt} == $test", $ranges[$i]->[0]; + } + else { + my $output = ""; + + # Well-formed UTF-8 continuation bytes on ascii platforms must be + # in the range 0x80 .. 0xBF. If we know that the input is + # well-formed (indicated by not trying to be 'safe'), we can omit + # tests that verify that the input is within either of these + # bounds. (No legal UTF-8 character can begin with anything in + # this range, so we don't have to worry about this being a + # continuation byte or not.) + if (ASCII_PLATFORM + && ! $opts_ref->{safe} + && $opts_ref->{type} =~ / ^ (?: utf8 | high ) $ /xi) + { + my $lower_limit_is_80 = ($ranges[$i]->[0] == 0x80); + my $upper_limit_is_BF = ($ranges[$i]->[1] == 0xBF); + + # If the range is the entire legal range, it matches any legal + # byte, so we can omit both tests. (This should happen only + # if the number of ranges is 1.) + if ($lower_limit_is_80 && $upper_limit_is_BF) { + return 1; } - - # If we didn't change to omit a test above, see if the number - # of elements is a power of 2 (only a single bit in the - # representation of its count will be set) and if so, it may - # be that a mask/compare optimization is possible. - if ($output eq "" - && pop_count($ranges[$i]->[1] - $ranges[$i]->[0] + 1) == 1) - { - my @list; - push @list, $_ for ($ranges[$i]->[0] .. $ranges[$i]->[1]); - my ($mask, $base) = calculate_mask(@list); - if (defined $mask && defined $base) { - $output = sprintf "( $test & $self->{val_fmt} ) == $self->{val_fmt}", $mask, $base; - } + elsif ($lower_limit_is_80) { # Just use the upper limit test + $output = sprintf("( $test <= $self->{val_fmt} )", + $ranges[$i]->[1]); } - - if ($output ne "") { # Prefer any optimization - $ranges[$i] = $output; + elsif ($upper_limit_is_BF) { # Just use the lower limit test + $output = sprintf("( $test >= $self->{val_fmt} )", + $ranges[$i]->[0]); } - elsif ($ranges[$i]->[0] + 1 == $ranges[$i]->[1]) { - # No optimization happened. We need a test that the code - # point is within both bounds. But, if the bounds are - # adjacent code points, it is cleaner to say - # 'first == test || second == test' - # than it is to say - # 'first <= test && test <= second' - $ranges[$i] = "( " - . join( " || ", ( map - { sprintf "$self->{val_fmt} == $test", $_ } - @{$ranges[$i]} ) ) - . " )"; - } - else { # Full bounds checking - $ranges[$i] = sprintf("( $self->{val_fmt} <= $test && $test <= $self->{val_fmt} )", $ranges[$i]->[0], $ranges[$i]->[1]); + } + + # If we didn't change to omit a test above, see if the number of + # elements is a power of 2 (only a single bit in the + # representation of its count will be set) and if so, it may be + # that a mask/compare optimization is possible. + if ($output eq "" + && pop_count($ranges[$i]->[1] - $ranges[$i]->[0] + 1) == 1) + { + my @list; + push @list, $_ for ($ranges[$i]->[0] .. $ranges[$i]->[1]); + my ($mask, $base) = calculate_mask(@list); + if (defined $mask && defined $base) { + $output = sprintf "( $test & $self->{val_fmt} ) == $self->{val_fmt}", $mask, $base; } } + + if ($output ne "") { # Prefer any optimization + $ranges[$i] = $output; + } + elsif ($ranges[$i]->[0] + 1 == $ranges[$i]->[1]) { + # No optimization happened. We need a test that the code + # point is within both bounds. But, if the bounds are + # adjacent code points, it is cleaner to say + # 'first == test || second == test' + # than it is to say + # 'first <= test && test <= second' + $ranges[$i] = "( " + . join( " || ", ( map + { sprintf "$self->{val_fmt} == $test", $_ } + @{$ranges[$i]} ) ) + . " )"; + } + else { # Full bounds checking + $ranges[$i] = sprintf("( $self->{val_fmt} <= $test && $test <= $self->{val_fmt} )", $ranges[$i]->[0], $ranges[$i]->[1]); + } } + } return "( " . join( " || ", @ranges ) . " )"; -- 2.7.4