From 1784d2f935ff888bd85e2072b1d2486e159c6caf Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 26 Dec 2013 14:01:49 -0700 Subject: [PATCH] White-space only This indents various newly-formed blocks (by the previous commit) in these three files, and reflows lines to fit into 79 columns --- lib/Unicode/UCD.pm | 354 +++++++++++++++++++++++++++-------------------------- lib/Unicode/UCD.t | 23 ++-- utf8.c | 56 ++++----- 3 files changed, 220 insertions(+), 213 deletions(-) diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index 106fe7e..8674545 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -2234,41 +2234,41 @@ sub prop_invlist ($;$) { shift @invlist; } else { - # The input lines look like: - # 0041\t005A # [26] - # 005F + # The input lines look like: + # 0041\t005A # [26] + # 005F - # Split into lines, stripped of trailing comments - foreach my $range (split "\n", - $swash->{'LIST'} =~ s/ \s* (?: \# .* )? $ //xmgr) - { - # And find the beginning and end of the range on the line - my ($hex_begin, $hex_end) = split "\t", $range; - my $begin = hex $hex_begin; - - # If the new range merely extends the old, we remove the marker - # created the last time through the loop for the old's end, which - # causes the new one's end to be used instead. - if (@invlist && $begin == $invlist[-1]) { - pop @invlist; - } - else { - # Add the beginning of the range - push @invlist, $begin; - } + # Split into lines, stripped of trailing comments + foreach my $range (split "\n", + $swash->{'LIST'} =~ s/ \s* (?: \# .* )? $ //xmgr) + { + # And find the beginning and end of the range on the line + my ($hex_begin, $hex_end) = split "\t", $range; + my $begin = hex $hex_begin; + + # If the new range merely extends the old, we remove the marker + # created the last time through the loop for the old's end, which + # causes the new one's end to be used instead. + if (@invlist && $begin == $invlist[-1]) { + pop @invlist; + } + else { + # Add the beginning of the range + push @invlist, $begin; + } - if (defined $hex_end) { # The next item starts with the code point 1 - # beyond the end of the range. - no warnings 'portable'; - my $end = hex $hex_end; - last if $end == $Unicode::UCD::MAX_CP; - push @invlist, $end + 1; - } - else { # No end of range, is a single code point. - push @invlist, $begin + 1; + if (defined $hex_end) { # The next item starts with the code point 1 + # beyond the end of the range. + no warnings 'portable'; + my $end = hex $hex_end; + last if $end == $Unicode::UCD::MAX_CP; + push @invlist, $end + 1; + } + else { # No end of range, is a single code point. + push @invlist, $begin + 1; + } } } - } # Could need to be inverted: add or subtract a 0 at the beginning of the # list. @@ -3198,163 +3198,169 @@ RETRY: } } else { - # The LIST input lines look like: - # ... - # 0374\t\tCommon - # 0375\t0377\tGreek # [3] - # 037A\t037D\tGreek # [4] - # 037E\t\tCommon - # 0384\t\tGreek - # ... - # - # Convert them to like - # 0374 => Common - # 0375 => Greek - # 0378 => $missing - # 037A => Greek - # 037E => Common - # 037F => $missing - # 0384 => Greek - # - # For binary properties, the final non-comment column is absent, and - # assumed to be 'Y'. - - foreach my $range (split "\n", $swash->{'LIST'}) { - $range =~ s/ \s* (?: \# .* )? $ //xg; # rmv trailing space, comments - - # Find the beginning and end of the range on the line - my ($hex_begin, $hex_end, $map) = split "\t", $range; - my $begin = hex $hex_begin; - no warnings 'portable'; - my $end = (defined $hex_end && $hex_end ne "") - ? hex $hex_end - : $begin; - - # Each time through the loop (after the first): - # $invlist[-2] contains the beginning of the previous range processed - # $invlist[-1] contains the end+1 of the previous range processed - # $invmap[-2] contains the value of the previous range processed - # $invmap[-1] contains the default value for missing ranges ($missing) + # The LIST input lines look like: + # ... + # 0374\t\tCommon + # 0375\t0377\tGreek # [3] + # 037A\t037D\tGreek # [4] + # 037E\t\tCommon + # 0384\t\tGreek + # ... # - # Thus, things are set up for the typical case of a new non-adjacent - # range of non-missings to be added. But, if the new range is - # adjacent, it needs to replace the [-1] element; and if the new - # range is a multiple value of the previous one, it needs to be added - # to the [-2] map element. - - # The first time through, everything will be empty. If the property - # doesn't have a range that begins at 0, add one that maps to $missing - if (! @invlist) { - if ($begin != 0) { - push @invlist, 0; - push @invmap, $missing; - } - } - elsif (@invlist > 1 && $invlist[-2] == $begin) { - - # Here we handle the case where the input has multiple entries for - # each code point. mktables should have made sure that each such - # range contains only one code point. At this point, $invlist[-1] - # is the $missing that was added at the end of the last loop - # iteration, and [-2] is the last real input code point, and that - # code point is the same as the one we are adding now, making the - # new one a multiple entry. Add it to the existing entry, either - # by pushing it to the existing list of multiple entries, or - # converting the single current entry into a list with both on it. - # This is all we need do for this iteration. - - if ($end != $begin) { - croak __PACKAGE__, ":prop_invmap: Multiple maps per code point in '$prop' require single-element ranges: begin=$begin, end=$end, map=$map"; - } - if (! ref $invmap[-2]) { - $invmap[-2] = [ $invmap[-2], $map ]; - } - else { - push @{$invmap[-2]}, $map; - } - $has_multiples = 1; - next; - } - elsif ($invlist[-1] == $begin) { - - # If the input isn't in the most compact form, so that there are - # two adjacent ranges that map to the same thing, they should be - # combined (EXCEPT where the arrays require adjustments, in which - # case everything is already set up correctly). This happens in - # our constructed dt mapping, as Element [-2] is the map for the - # latest range so far processed. Just set the beginning point of - # the map to $missing (in invlist[-1]) to 1 beyond where this - # range ends. For example, in - # 12\t13\tXYZ - # 14\t17\tXYZ - # we have set it up so that it looks like - # 12 => XYZ - # 14 => $missing + # Convert them to like + # 0374 => Common + # 0375 => Greek + # 0378 => $missing + # 037A => Greek + # 037E => Common + # 037F => $missing + # 0384 => Greek + # + # For binary properties, the final non-comment column is absent, and + # assumed to be 'Y'. + + foreach my $range (split "\n", $swash->{'LIST'}) { + $range =~ s/ \s* (?: \# .* )? $ //xg; # rmv trailing space, comments + + # Find the beginning and end of the range on the line + my ($hex_begin, $hex_end, $map) = split "\t", $range; + my $begin = hex $hex_begin; + no warnings 'portable'; + my $end = (defined $hex_end && $hex_end ne "") + ? hex $hex_end + : $begin; + + # Each time through the loop (after the first): + # $invlist[-2] contains the beginning of the previous range processed + # $invlist[-1] contains the end+1 of the previous range processed + # $invmap[-2] contains the value of the previous range processed + # $invmap[-1] contains the default value for missing ranges + # ($missing) # - # We now see that it should be - # 12 => XYZ - # 18 => $missing - if (! $requires_adjustment && @invlist > 1 && ( (defined $map) - ? $invmap[-2] eq $map - : $invmap[-2] eq 'Y')) - { - $invlist[-1] = $end + 1; + # Thus, things are set up for the typical case of a new + # non-adjacent range of non-missings to be added. But, if the new + # range is adjacent, it needs to replace the [-1] element; and if + # the new range is a multiple value of the previous one, it needs + # to be added to the [-2] map element. + + # The first time through, everything will be empty. If the + # property doesn't have a range that begins at 0, add one that + # maps to $missing + if (! @invlist) { + if ($begin != 0) { + push @invlist, 0; + push @invmap, $missing; + } + } + elsif (@invlist > 1 && $invlist[-2] == $begin) { + + # Here we handle the case where the input has multiple entries + # for each code point. mktables should have made sure that + # each such range contains only one code point. At this + # point, $invlist[-1] is the $missing that was added at the + # end of the last loop iteration, and [-2] is the last real + # input code point, and that code point is the same as the one + # we are adding now, making the new one a multiple entry. Add + # it to the existing entry, either by pushing it to the + # existing list of multiple entries, or converting the single + # current entry into a list with both on it. This is all we + # need do for this iteration. + + if ($end != $begin) { + croak __PACKAGE__, ":prop_invmap: Multiple maps per code point in '$prop' require single-element ranges: begin=$begin, end=$end, map=$map"; + } + if (! ref $invmap[-2]) { + $invmap[-2] = [ $invmap[-2], $map ]; + } + else { + push @{$invmap[-2]}, $map; + } + $has_multiples = 1; next; } + elsif ($invlist[-1] == $begin) { + + # If the input isn't in the most compact form, so that there + # are two adjacent ranges that map to the same thing, they + # should be combined (EXCEPT where the arrays require + # adjustments, in which case everything is already set up + # correctly). This happens in our constructed dt mapping, as + # Element [-2] is the map for the latest range so far + # processed. Just set the beginning point of the map to + # $missing (in invlist[-1]) to 1 beyond where this range ends. + # For example, in + # 12\t13\tXYZ + # 14\t17\tXYZ + # we have set it up so that it looks like + # 12 => XYZ + # 14 => $missing + # + # We now see that it should be + # 12 => XYZ + # 18 => $missing + if (! $requires_adjustment && @invlist > 1 && ( (defined $map) + ? $invmap[-2] eq $map + : $invmap[-2] eq 'Y')) + { + $invlist[-1] = $end + 1; + next; + } - # Here, the range started in the previous iteration that maps to - # $missing starts at the same code point as this range. That - # means there is no gap to fill that that range was intended for, - # so we just pop it off the parallel arrays. - pop @invlist; - pop @invmap; - } - - # Add the range beginning, and the range's map. - push @invlist, $begin; - if ($returned_prop eq 'ToDm') { - - # The decomposition maps are either a line like - # which are to be taken as is; or a sequence of code points in hex - # and separated by blanks. Convert them to decimal, and if there - # is more than one, use an anonymous array as the map. - if ($map =~ /^ < /x) { - push @invmap, $map; + # Here, the range started in the previous iteration that maps + # to $missing starts at the same code point as this range. + # That means there is no gap to fill that that range was + # intended for, so we just pop it off the parallel arrays. + pop @invlist; + pop @invmap; } - else { - my @map = split " ", $map; - if (@map == 1) { - push @invmap, $map[0]; + + # Add the range beginning, and the range's map. + push @invlist, $begin; + if ($returned_prop eq 'ToDm') { + + # The decomposition maps are either a line like which are to be taken as is; or a sequence of code + # points in hex and separated by blanks. Convert them to + # decimal, and if there is more than one, use an anonymous + # array as the map. + if ($map =~ /^ < /x) { + push @invmap, $map; } else { - push @invmap, \@map; + my @map = split " ", $map; + if (@map == 1) { + push @invmap, $map[0]; + } + else { + push @invmap, \@map; + } } } - } - else { + else { - # Otherwise, convert hex formatted list entries to decimal; add a - # 'Y' map for the missing value in binary properties, or - # otherwise, use the input map unchanged. - $map = ($format eq 'x' || $format eq 'ax') - ? hex $map - : $format eq 'b' - ? 'Y' - : $map; - push @invmap, $map; - } + # Otherwise, convert hex formatted list entries to decimal; + # add a 'Y' map for the missing value in binary properties, or + # otherwise, use the input map unchanged. + $map = ($format eq 'x' || $format eq 'ax') + ? hex $map + : $format eq 'b' + ? 'Y' + : $map; + push @invmap, $map; + } - # We just started a range. It ends with $end. The gap between it and - # the next element in the list must be filled with a range that maps - # to the default value. If there is no gap, the next iteration will - # pop this, unless there is no next iteration, and we have filled all - # of the Unicode code space, so check for that and skip. - if ($end < $Unicode::UCD::MAX_CP) { - push @invlist, $end + 1; - push @invmap, $missing; + # We just started a range. It ends with $end. The gap between it + # and the next element in the list must be filled with a range + # that maps to the default value. If there is no gap, the next + # iteration will pop this, unless there is no next iteration, and + # we have filled all of the Unicode code space, so check for that + # and skip. + if ($end < $Unicode::UCD::MAX_CP) { + push @invlist, $end + 1; + push @invmap, $missing; + } } } - } # If the property is empty, make all code points use the value for missing # ones. diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index 0d709b1..1c7b45c 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -1878,17 +1878,18 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) { } } else { - $end = ($start == $end) ? "" : sprintf($file_range_format, $end); - if ($invmap_ref->[$i] ne "") { - $tested_map .= sprintf "$file_range_format\t%s\t%s\n", - $start, $end, $invmap_ref->[$i]; - } - elsif ($end ne "") { - $tested_map .= sprintf "$file_range_format\t%s\n", $start, $end; - } - else { - $tested_map .= sprintf "$file_range_format\n", $start; - } + $end = ($start == $end) ? "" : sprintf($file_range_format, $end); + if ($invmap_ref->[$i] ne "") { + $tested_map .= sprintf "$file_range_format\t%s\t%s\n", + $start, $end, $invmap_ref->[$i]; + } + elsif ($end ne "") { + $tested_map .= sprintf "$file_range_format\t%s\n", + $start, $end; + } + else { + $tested_map .= sprintf "$file_range_format\n", $start; + } } } # End of looping over all elements. diff --git a/utf8.c b/utf8.c index 818efb1..45ea233 100644 --- a/utf8.c +++ b/utf8.c @@ -4167,39 +4167,39 @@ Perl__swash_to_invlist(pTHX_ SV* const swash) } else { - /* Scan the input to count the number of lines to preallocate array size - * based on worst possible case, which is each line in the input creates 2 - * elements in the inversion list: 1) the beginning of a range in the list; - * 2) the beginning of a range not in the list. */ - while ((loc = (strchr(loc, '\n'))) != NULL) { - elements += 2; - loc++; - } - - /* If the ending is somehow corrupt and isn't a new line, add another - * element for the final range that isn't in the inversion list */ - if (! (*lend == '\n' - || (*lend == '\0' && (lcur == 0 || *(lend - 1) == '\n')))) - { - elements++; - } + /* Scan the input to count the number of lines to preallocate array + * size based on worst possible case, which is each line in the input + * creates 2 elements in the inversion list: 1) the beginning of a + * range in the list; 2) the beginning of a range not in the list. */ + while ((loc = (strchr(loc, '\n'))) != NULL) { + elements += 2; + loc++; + } - invlist = _new_invlist(elements); + /* If the ending is somehow corrupt and isn't a new line, add another + * element for the final range that isn't in the inversion list */ + if (! (*lend == '\n' + || (*lend == '\0' && (lcur == 0 || *(lend - 1) == '\n')))) + { + elements++; + } - /* Now go through the input again, adding each range to the list */ - while (l < lend) { - UV start, end; - UV val; /* Not used by this function */ + invlist = _new_invlist(elements); - l = S_swash_scan_list_line(aTHX_ l, lend, &start, &end, &val, - cBOOL(octets), typestr); + /* Now go through the input again, adding each range to the list */ + while (l < lend) { + UV start, end; + UV val; /* Not used by this function */ - if (l > lend) { - break; - } + l = S_swash_scan_list_line(aTHX_ l, lend, &start, &end, &val, + cBOOL(octets), typestr); - invlist = _add_range_to_invlist(invlist, start, end); - } + if (l > lend) { + break; + } + + invlist = _add_range_to_invlist(invlist, start, end); + } } /* Invert if the data says it should be */ -- 2.7.4