From 39a0f513a3472b239cae9745b3b89abe93b6ca79 Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Thu, 6 Dec 2012 11:11:47 +0100 Subject: [PATCH] make regcharclass generate submacros if necessary to keep them short Some compilers can't handle unexpanded macros longer than something like 8000 characters. So we split up long ones into sub macros to work around the problem --- regcharclass.h | 134 ++++++++++++++++++++++++++------------------------ regen/regcharclass.pl | 34 ++++++++----- utf8.h | 17 +++---- 3 files changed, 102 insertions(+), 83 deletions(-) diff --git a/regcharclass.h b/regcharclass.h index 64e4453..1d335e6 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -624,9 +624,8 @@ ®charclass_multi_char_folds::multi_char_folds(1) */ /*** GENERATED CODE ***/ -#define is_MULTI_CHAR_FOLD_utf8_safe(s,e) \ -( ((e)-(s) > 5) ? \ - ( ( 0x61 == ((U8*)s)[0] ) ? \ +#define is_MULTI_CHAR_FOLD_utf8_safe_part0(s,e) \ +( ( 0x61 == ((U8*)s)[0] ) ? \ ( ( ( 0xCA == ((U8*)s)[1] ) && ( 0xBE == ((U8*)s)[2] ) ) ? 3 : 0 ) \ : ( 0x66 == ((U8*)s)[0] ) ? \ ( ( 0x66 == ((U8*)s)[1] ) ? \ @@ -651,39 +650,19 @@ ( ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0xB1 == ((U8*)s)[1] || 0xB7 == ((U8*)s)[1] ) ? \ ( ( 0xCD == ((U8*)s)[2] ) ? \ - ( ( 0x82 == ((U8*)s)[3] ) ? \ - ( ( ( 0xCE == ((U8*)s)[4] ) && ( 0xB9 == ((U8*)s)[5] ) ) ? 6 : 4 )\ - : 0 ) \ + ( ( 0x82 == ((U8*)s)[3] ) ? 4 : 0 ) \ : ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\ - : ( 0xB9 == ((U8*)s)[1] ) ? \ - ( ( 0xCC == ((U8*)s)[2] ) ? \ - ( ( 0x88 == ((U8*)s)[3] ) ? \ - ( ( 0xCC == ((U8*)s)[4] ) ? \ - ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 0 ) \ - : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 0 )\ - : 0 ) \ - : ( ( 0xCD == ((U8*)s)[2] ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\ - : 0 ) \ + : ( ( ( 0xB9 == ((U8*)s)[1] ) && ( 0xCD == ((U8*)s)[2] ) ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0xCF == ((U8*)s)[0] ) ? \ ( ( 0x81 == ((U8*)s)[1] ) ? \ ( ( ( 0xCC == ((U8*)s)[2] ) && ( 0x93 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0x85 == ((U8*)s)[1] ) ? \ ( ( 0xCC == ((U8*)s)[2] ) ? \ - ( ( 0x88 == ((U8*)s)[3] ) ? \ - ( ( 0xCC == ((U8*)s)[4] ) ? \ - ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 0 ) \ - : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 0 )\ - : ( 0x93 == ((U8*)s)[3] ) ? \ - ( ( 0xCC == ((U8*)s)[4] ) ? \ - ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 4 ) \ - : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 4 )\ - : 0 ) \ + ( ( 0x93 == ((U8*)s)[3] ) ? 4 : 0 ) \ : ( ( 0xCD == ((U8*)s)[2] ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0x89 == ((U8*)s)[1] ) ? \ ( ( 0xCD == ((U8*)s)[2] ) ? \ - ( ( 0x82 == ((U8*)s)[3] ) ? \ - ( ( ( 0xCE == ((U8*)s)[4] ) && ( 0xB9 == ((U8*)s)[5] ) ) ? 6 : 4 )\ - : 0 ) \ + ( ( 0x82 == ((U8*)s)[3] ) ? 4 : 0 ) \ : ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( ( ( 0x8E == ((U8*)s)[1] ) && ( 0xCE == ((U8*)s)[2] ) ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0xD5 == ((U8*)s)[0] ) ? \ @@ -696,8 +675,12 @@ ( ( 0xBC == ((U8*)s)[1] ) ? \ ( ( ( ( ( ((U8*)s)[2] & 0xD8 ) == 0x80 ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\ : ( ( ( ( 0xBD == ((U8*)s)[1] ) && ( ( ( ((U8*)s)[2] & 0xF8 ) == 0xA0 ) || ( ( ((U8*)s)[2] & 0xFB ) == 0xB0 ) || ((U8*)s)[2] == 0xBC ) ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\ - : 0 ) \ -: ((e)-(s) > 4) ? \ + : 0 ) + + +/*** GENERATED CODE ***/ +#define is_MULTI_CHAR_FOLD_utf8_safe_part1(s,e) \ +( ((e)-(s) > 3) ? \ ( ( 0x61 == ((U8*)s)[0] ) ? \ ( ( ( 0xCA == ((U8*)s)[1] ) && ( 0xBE == ((U8*)s)[2] ) ) ? 3 : 0 ) \ : ( 0x66 == ((U8*)s)[0] ) ? \ @@ -744,12 +727,37 @@ : ( 0xB4 == ((U8*)s)[1] ) ? \ ( ( ( 0xD5 == ((U8*)s)[2] ) && ( ( ( ((U8*)s)[3] & 0xF7 ) == 0xA5 ) || ((U8*)s)[3] == 0xAB || ((U8*)s)[3] == 0xB6 ) ) ? 4 : 0 )\ : ( ( ( 0xBE == ((U8*)s)[1] ) && ( 0xD5 == ((U8*)s)[2] ) ) && ( 0xB6 == ((U8*)s)[3] ) ) ? 4 : 0 )\ - : ( 0xE1 == ((U8*)s)[0] ) ? \ - ( ( 0xBC == ((U8*)s)[1] ) ? \ - ( ( ( ( ( ((U8*)s)[2] & 0xD8 ) == 0x80 ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\ - : ( ( ( ( 0xBD == ((U8*)s)[1] ) && ( ( ( ((U8*)s)[2] & 0xF8 ) == 0xA0 ) || ( ( ((U8*)s)[2] & 0xFB ) == 0xB0 ) || ((U8*)s)[2] == 0xBC ) ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\ : 0 ) \ -: ((e)-(s) > 3) ? \ +: ((e)-(s) > 2) ? \ + ( ( 0x61 == ((U8*)s)[0] ) ? \ + ( ( ( 0xCA == ((U8*)s)[1] ) && ( 0xBE == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( 0x66 == ((U8*)s)[0] ) ? \ + ( ( 0x66 == ((U8*)s)[1] ) ? \ + ( ( 0x69 == ((U8*)s)[2] || 0x6C == ((U8*)s)[2] ) ? 3 : 2 ) \ + : ( 0x69 == ((U8*)s)[1] || 0x6C == ((U8*)s)[1] ) ? 2 : 0 ) \ + : ( 0x68 == ((U8*)s)[0] ) ? \ + ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0xB1 == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( 0x69 == ((U8*)s)[0] ) ? \ + ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x87 == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( 0x6A == ((U8*)s)[0] ) ? \ + ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x8C == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( 0x73 == ((U8*)s)[0] ) ? \ + ( ( 0x73 == ((U8*)s)[1] || 0x74 == ((U8*)s)[1] ) ? 2 : 0 ) \ + : ( 0x74 == ((U8*)s)[0] ) ? \ + ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x88 == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( 0x77 == ((U8*)s)[0] || 0x79 == ((U8*)s)[0] ) ? \ + ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x8A == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( ( ( 0xCA == ((U8*)s)[0] ) && ( 0xBC == ((U8*)s)[1] ) ) && ( 0x6E == ((U8*)s)[2] ) ) ? 3 : 0 )\ +: ((e)-(s) > 1) ? \ + ( ( 0x66 == ((U8*)s)[0] ) ? \ + ( ( 0x66 == ((U8*)s)[1] || 0x69 == ((U8*)s)[1] || 0x6C == ((U8*)s)[1] ) ? 2 : 0 )\ + : ( ( 0x73 == ((U8*)s)[0] ) && ( 0x73 == ((U8*)s)[1] || 0x74 == ((U8*)s)[1] ) ) ? 2 : 0 )\ +: 0 ) + + +/*** GENERATED CODE ***/ +#define is_MULTI_CHAR_FOLD_utf8_safe(s,e) \ +( ((e)-(s) > 5) ? \ ( ( 0x61 == ((U8*)s)[0] ) ? \ ( ( ( 0xCA == ((U8*)s)[1] ) && ( 0xBE == ((U8*)s)[2] ) ) ? 3 : 0 ) \ : ( 0x66 == ((U8*)s)[0] ) ? \ @@ -775,19 +783,39 @@ ( ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0xB1 == ((U8*)s)[1] || 0xB7 == ((U8*)s)[1] ) ? \ ( ( 0xCD == ((U8*)s)[2] ) ? \ - ( ( 0x82 == ((U8*)s)[3] ) ? 4 : 0 ) \ + ( ( 0x82 == ((U8*)s)[3] ) ? \ + ( ( ( 0xCE == ((U8*)s)[4] ) && ( 0xB9 == ((U8*)s)[5] ) ) ? 6 : 4 )\ + : 0 ) \ : ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\ - : ( ( ( 0xB9 == ((U8*)s)[1] ) && ( 0xCD == ((U8*)s)[2] ) ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\ + : ( 0xB9 == ((U8*)s)[1] ) ? \ + ( ( 0xCC == ((U8*)s)[2] ) ? \ + ( ( 0x88 == ((U8*)s)[3] ) ? \ + ( ( 0xCC == ((U8*)s)[4] ) ? \ + ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 0 ) \ + : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 0 )\ + : 0 ) \ + : ( ( 0xCD == ((U8*)s)[2] ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\ + : 0 ) \ : ( 0xCF == ((U8*)s)[0] ) ? \ ( ( 0x81 == ((U8*)s)[1] ) ? \ ( ( ( 0xCC == ((U8*)s)[2] ) && ( 0x93 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0x85 == ((U8*)s)[1] ) ? \ ( ( 0xCC == ((U8*)s)[2] ) ? \ - ( ( 0x93 == ((U8*)s)[3] ) ? 4 : 0 ) \ + ( ( 0x88 == ((U8*)s)[3] ) ? \ + ( ( 0xCC == ((U8*)s)[4] ) ? \ + ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 0 ) \ + : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 0 )\ + : ( 0x93 == ((U8*)s)[3] ) ? \ + ( ( 0xCC == ((U8*)s)[4] ) ? \ + ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 4 ) \ + : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 4 )\ + : 0 ) \ : ( ( 0xCD == ((U8*)s)[2] ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0x89 == ((U8*)s)[1] ) ? \ ( ( 0xCD == ((U8*)s)[2] ) ? \ - ( ( 0x82 == ((U8*)s)[3] ) ? 4 : 0 ) \ + ( ( 0x82 == ((U8*)s)[3] ) ? \ + ( ( ( 0xCE == ((U8*)s)[4] ) && ( 0xB9 == ((U8*)s)[5] ) ) ? 6 : 4 )\ + : 0 ) \ : ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( ( ( 0x8E == ((U8*)s)[1] ) && ( 0xCE == ((U8*)s)[2] ) ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0xD5 == ((U8*)s)[0] ) ? \ @@ -796,32 +824,12 @@ : ( 0xB4 == ((U8*)s)[1] ) ? \ ( ( ( 0xD5 == ((U8*)s)[2] ) && ( ( ( ((U8*)s)[3] & 0xF7 ) == 0xA5 ) || ((U8*)s)[3] == 0xAB || ((U8*)s)[3] == 0xB6 ) ) ? 4 : 0 )\ : ( ( ( 0xBE == ((U8*)s)[1] ) && ( 0xD5 == ((U8*)s)[2] ) ) && ( 0xB6 == ((U8*)s)[3] ) ) ? 4 : 0 )\ + : ( 0xE1 == ((U8*)s)[0] ) ? \ + ( ( 0xBC == ((U8*)s)[1] ) ? \ + ( ( ( ( ( ((U8*)s)[2] & 0xD8 ) == 0x80 ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\ + : ( ( ( ( 0xBD == ((U8*)s)[1] ) && ( ( ( ((U8*)s)[2] & 0xF8 ) == 0xA0 ) || ( ( ((U8*)s)[2] & 0xFB ) == 0xB0 ) || ((U8*)s)[2] == 0xBC ) ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\ : 0 ) \ -: ((e)-(s) > 2) ? \ - ( ( 0x61 == ((U8*)s)[0] ) ? \ - ( ( ( 0xCA == ((U8*)s)[1] ) && ( 0xBE == ((U8*)s)[2] ) ) ? 3 : 0 ) \ - : ( 0x66 == ((U8*)s)[0] ) ? \ - ( ( 0x66 == ((U8*)s)[1] ) ? \ - ( ( 0x69 == ((U8*)s)[2] || 0x6C == ((U8*)s)[2] ) ? 3 : 2 ) \ - : ( 0x69 == ((U8*)s)[1] || 0x6C == ((U8*)s)[1] ) ? 2 : 0 ) \ - : ( 0x68 == ((U8*)s)[0] ) ? \ - ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0xB1 == ((U8*)s)[2] ) ) ? 3 : 0 ) \ - : ( 0x69 == ((U8*)s)[0] ) ? \ - ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x87 == ((U8*)s)[2] ) ) ? 3 : 0 ) \ - : ( 0x6A == ((U8*)s)[0] ) ? \ - ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x8C == ((U8*)s)[2] ) ) ? 3 : 0 ) \ - : ( 0x73 == ((U8*)s)[0] ) ? \ - ( ( 0x73 == ((U8*)s)[1] || 0x74 == ((U8*)s)[1] ) ? 2 : 0 ) \ - : ( 0x74 == ((U8*)s)[0] ) ? \ - ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x88 == ((U8*)s)[2] ) ) ? 3 : 0 ) \ - : ( 0x77 == ((U8*)s)[0] || 0x79 == ((U8*)s)[0] ) ? \ - ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x8A == ((U8*)s)[2] ) ) ? 3 : 0 ) \ - : ( ( ( 0xCA == ((U8*)s)[0] ) && ( 0xBC == ((U8*)s)[1] ) ) && ( 0x6E == ((U8*)s)[2] ) ) ? 3 : 0 )\ -: ((e)-(s) > 1) ? \ - ( ( 0x66 == ((U8*)s)[0] ) ? \ - ( ( 0x66 == ((U8*)s)[1] || 0x69 == ((U8*)s)[1] || 0x6C == ((U8*)s)[1] ) ? 2 : 0 )\ - : ( ( 0x73 == ((U8*)s)[0] ) && ( 0x73 == ((U8*)s)[1] || 0x74 == ((U8*)s)[1] ) ) ? 2 : 0 )\ -: 0 ) +: ((e)-(s) > 4) ? is_MULTI_CHAR_FOLD_utf8_safe_part0(s,e) : is_MULTI_CHAR_FOLD_utf8_safe_part1(s,e) ) /* MULTI_CHAR_FOLD: multi-char strings that are folded to by a single character diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl index 0bab570..5d37e85 100755 --- a/regen/regcharclass.pl +++ b/regen/regcharclass.pl @@ -237,6 +237,10 @@ sub __clean { : \s* \5 \s* ([()]) /$1 ( $2 && $3 ) ? $4 : $5 $6/gx; + #$expr=~s/\(\(U8\*\)s\)\[(\d+)\]/S$1/g if length $expr > 8000; + #$expr=~s/\s+//g if length $expr > 8000; + + die "Expression too long" if length $expr > 8000; return $expr; } @@ -1093,7 +1097,7 @@ sub _combine { # _render() # recursively convert an optree to text with reasonably neat formatting sub _render { - my ( $self, $op, $combine, $brace, $opts_ref )= @_; + my ( $self, $op, $combine, $brace, $opts_ref, $def, $submacros )= @_; return 0 if ! defined $op; # The set is empty if ( !ref $op ) { return $op; @@ -1101,10 +1105,10 @@ sub _render { my $cond= $self->_cond_as_str( $op, $combine, $opts_ref ); #no warnings 'recursion'; # This would allow really really inefficient # code to be generated. See pod - my $yes= $self->_render( $op->{yes}, $combine, 1, $opts_ref ); + my $yes= $self->_render( $op->{yes}, $combine, 1, $opts_ref, $def, $submacros ); return $yes if $cond eq '1'; - my $no= $self->_render( $op->{no}, $combine, 0, $opts_ref ); + my $no= $self->_render( $op->{no}, $combine, 0, $opts_ref, $def, $submacros ); return "( $cond )" if $yes eq '1' and $no eq '0'; my ( $lb, $rb )= $brace ? ( "( ", " )" ) : ( "", "" ); return "$lb$cond ? $yes : $no$rb" @@ -1118,7 +1122,13 @@ sub _render { $yes= " " . $yes; } - return "$lb$cond ?$yes$ind: $no$rb"; + my $str= "$lb$cond ?$yes$ind: $no$rb"; + if (length $str > 6000) { + push @$submacros, sprintf "#define $def\n( %s )", "_part" . (my $yes_idx= 0+@$submacros), $yes; + push @$submacros, sprintf "#define $def\n( %s )", "_part" . (my $no_idx= 0+@$submacros), $no; + return sprintf "%s%s ? $def : $def%s", $lb, $cond, "_part$yes_idx", "_part$no_idx", $rb; + } + return $str; } # $expr=render($op,$combine) @@ -1129,9 +1139,12 @@ sub _render { # longer lists such as that resulting from type 'cp' output. # Currently only used for type 'cp' macros. sub render { - my ( $self, $op, $combine, $opts_ref )= @_; - my $str= "( " . $self->_render( $op, $combine, 0, $opts_ref ) . " )"; - return __clean( $str ); + my ( $self, $op, $combine, $opts_ref, $def_fmt )= @_; + + my @submacros; + my $macro= sprintf "#define $def_fmt\n( %s )", "", $self->_render( $op, $combine, 0, $opts_ref, $def_fmt, \@submacros ); + + return join "\n\n", map { "/*** GENERATED CODE ***/\n" . __macro( __clean( $_ ) ) } @submacros, $macro; } # make_macro @@ -1168,8 +1181,6 @@ sub make_macro { } else { $method= 'optree'; } - my $optree= $self->$method( %opts, type => $type, ret_type => $ret_type ); - my $text= $self->render( $optree, ($type =~ /^cp/) ? 1 : 0, \%opts ); my @args= $type =~ /^cp/ ? 'cp' : 's'; push @args, "e" if $opts{safe}; push @args, "is_utf8" if $type eq 'generic'; @@ -1179,8 +1190,9 @@ sub make_macro { my $ext= $type eq 'generic' ? '' : '_' . lc( $type ); $ext .= "_safe" if $opts{safe}; my $argstr= join ",", @args; - return "/*** GENERATED CODE ***/\n" - . __macro( "#define $pfx$self->{op}$ext($argstr)\n$text" ); + my $def_fmt="$pfx$self->{op}$ext%s($argstr)"; + my $optree= $self->$method( %opts, type => $type, ret_type => $ret_type ); + return $self->render( $optree, ($type =~ /^cp/) ? 1 : 0, \%opts, $def_fmt ); } # if we arent being used as a module (highly likely) then process diff --git a/utf8.h b/utf8.h index 3fbc28a..1df972b 100644 --- a/utf8.h +++ b/utf8.h @@ -476,6 +476,11 @@ Perl's extended UTF-8 means we can have start bytes up to FF. * takes on the order of 10 minutes to generate, and is never going to change. * The EBCDIC equivalent hasn't been commented out in regcharclass.pl, so it * should generate and run the correct stuff */ +/* + UTF8_CHAR: Matches utf8 from 1 to 4 bytes + + 0x0 - 0x1FFFFF +*/ /*** GENERATED CODE ***/ #define is_UTF8_CHAR_utf8_safe(s,e) \ ( ((e)-(s) > 3) ? \ @@ -488,23 +493,17 @@ Perl's extended UTF-8 means we can have start bytes up to FF. ( ( ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) ? 3 : 0 )\ : ( 0xF0 == ((U8*)s)[0] ) ? \ ( ( ( ( 0x90 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xBF ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) && ( ( ((U8*)s)[3] & 0xC0 ) == 0x80 ) ) ? 4 : 0 )\ - : ( 0xF1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xF7 ) ? \ - ( ( ( ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) && ( ( ((U8*)s)[3] & 0xC0 ) == 0x80 ) ) ? 4 : 0 )\ - : 0 ) \ + : ( ( ( ( 0xF1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xF7 ) && ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) && ( ( ((U8*)s)[3] & 0xC0 ) == 0x80 ) ) ? 4 : 0 )\ : ((e)-(s) > 2) ? \ ( ( ( ((U8*)s)[0] & 0x80 ) == 0x00 ) ? 1 \ : ( 0xC2 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xDF ) ? \ ( ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) ? 2 : 0 ) \ : ( 0xE0 == ((U8*)s)[0] ) ? \ ( ( ( ( ((U8*)s)[1] & 0xE0 ) == 0xA0 ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) ? 3 : 0 )\ - : ( 0xE1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xEF ) ? \ - ( ( ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) ? 3 : 0 )\ - : 0 ) \ + : ( ( ( 0xE1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xEF ) && ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) ? 3 : 0 )\ : ((e)-(s) > 1) ? \ ( ( ( ((U8*)s)[0] & 0x80 ) == 0x00 ) ? 1 \ - : ( 0xC2 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xDF ) ? \ - ( ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) ? 2 : 0 ) \ - : 0 ) \ + : ( ( 0xC2 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xDF ) && ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) ) ? 2 : 0 )\ : ((e)-(s) > 0) ? \ ( ( ((U8*)s)[0] & 0x80 ) == 0x00 ) \ : 0 ) -- 2.7.4