From 251b239f638a40fdf052716f0d8512fd4122ec8c Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 1 Mar 2014 18:47:10 -0700 Subject: [PATCH] Revert most of 3a8bbffbce: Avoid unnecessary malformed checking My thinking was muddled when I made that commit, and this reverts the essence of it. The theory was that since we have already processed the regex pattern, we don't need to check it for malformedness, hence we don't need the "safe" form of certain macros that check for and avoid running off the end of the buffer. It is true that we don't have to worry about malformedness indicating that the buffer is bigger than it really is, but these macros can match up to three well-formed characters, so we do have to make sure that all three are in the buffer, and that the input isn't just the first two at the buffer's end. This was caught by running valgrind. --- regcharclass.h | 318 +++++++++++++++++++++++++++++++++++++------------- regcomp.c | 8 +- regen/regcharclass.pl | 4 +- regexec.c | 4 +- 4 files changed, 243 insertions(+), 91 deletions(-) diff --git a/regcharclass.h b/regcharclass.h index 3c22657..7fe7235 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -585,88 +585,234 @@ ®charclass_multi_char_folds::multi_char_folds(1) */ /*** GENERATED CODE ***/ -#define is_MULTI_CHAR_FOLD_utf8_part0(s) \ -( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xB1 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 ) - - -/*** GENERATED CODE ***/ -#define is_MULTI_CHAR_FOLD_utf8_part1(s) \ -( ( 0x69 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x87 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ -: ( 0x6A == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x8C == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ -: ( 0x73 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x73 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0x74 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 )\ -: ( 0x74 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x88 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ -: ( 0x77 == NATIVE_TO_LATIN1(((U8*)s)[0]) || 0x79 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ?\ - ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x8A == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ -: ( 0xC5 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( ( ( 0xBF == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xC5 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) && ( 0xBF == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ -: ( 0xCA == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( ( 0xBC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x6E == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ -: ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xFD ) == 0xAC ) ? \ - ( ( ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ - : ( 0xB1 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0xB7 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ?\ - ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? \ - ( ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ? \ - ( ( ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[4]) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[5]) ) ) ? 6 : 4 )\ - : 0 ) \ - : ( ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ - : ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ - ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? \ - ( ( 0x88 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ? \ - ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[4]) ) ? \ - ( ( NATIVE_TO_LATIN1(((U8*)s)[5]) <= 0x81 ) ? 6 : 0 ) \ - : ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[4]) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[5]) ) ) ? 6 : 0 )\ - : 0 ) \ - : ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ - : 0 ) \ -: ( 0xCF == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x81 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ - ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0x93 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ - : ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ - ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? \ - ( ( 0x88 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ? \ - ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[4]) ) ? \ - ( ( NATIVE_TO_LATIN1(((U8*)s)[5]) <= 0x81 ) ? 6 : 0 ) \ - : ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[4]) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[5]) ) ) ? 6 : 0 )\ - : ( 0x93 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ? \ - ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[4]) ) ? \ - ( ( NATIVE_TO_LATIN1(((U8*)s)[5]) <= 0x81 ) ? 6 : 4 ) \ - : ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[4]) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[5]) ) ) ? 6 : 4 )\ - : 0 ) \ - : ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ - : ( 0x89 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ - ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? \ - ( ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ? \ - ( ( ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[4]) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[5]) ) ) ? 6 : 4 )\ - : 0 ) \ - : ( ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ - : ( ( ( 0x8E == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ -: ( 0xD5 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0xA5 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ - ( ( ( 0xD6 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ - : ( 0xB4 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ - ( ( ( 0xD5 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( ( ( NATIVE_TO_LATIN1(((U8*)s)[3]) & 0xF7 ) == 0xA5 ) || NATIVE_TO_LATIN1(((U8*)s)[3]) == 0xAB || NATIVE_TO_LATIN1(((U8*)s)[3]) == 0xB6 ) ) ? 4 : 0 )\ - : ( ( ( 0xBE == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xD5 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) && ( 0xB6 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ -: ( 0xE1 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0xBC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ - ( ( ( ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xD8 ) == 0x80 ) && ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[4]) ) ) ? 5 : 0 )\ - : ( ( ( ( 0xBD == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xF8 ) == 0xA0 ) || ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFB ) == 0xB0 ) || NATIVE_TO_LATIN1(((U8*)s)[2]) == 0xBC ) ) && ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[4]) ) ) ? 5 : 0 )\ +#define is_MULTI_CHAR_FOLD_utf8_safe_part0(s,e) \ +( ( 0x73 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0x74 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) + + +/*** GENERATED CODE ***/ +#define is_MULTI_CHAR_FOLD_utf8_safe_part1(s,e) \ +( ( 0x74 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x88 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x77 == NATIVE_TO_LATIN1(((U8*)s)[0]) || 0x79 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ?\ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x8A == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0xC5 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( ( 0xBF == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xC5 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) && ( 0xBF == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xCA == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xBC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x6E == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xFD ) == 0xAC ) ? \ + ( ( ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xB1 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0xB7 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ?\ + ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? \ + ( ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ? \ + ( ( ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[4]) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[5]) ) ) ? 6 : 4 )\ + : 0 ) \ + : ( ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? \ + ( ( 0x88 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ? \ + ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[4]) ) ? \ + ( ( ( NATIVE_TO_LATIN1(((U8*)s)[5]) & 0xFE ) == 0x80 ) ? 6 : 0 )\ + : ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[4]) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[5]) ) ) ? 6 : 0 )\ + : 0 ) \ + : ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : 0 ) \ + : ( 0xCF == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( 0x81 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0x93 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? \ + ( ( 0x88 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ? \ + ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[4]) ) ? \ + ( ( ( NATIVE_TO_LATIN1(((U8*)s)[5]) & 0xFE ) == 0x80 ) ? 6 : 0 )\ + : ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[4]) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[5]) ) ) ? 6 : 0 )\ + : ( 0x93 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ? \ + ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[4]) ) ? \ + ( ( ( NATIVE_TO_LATIN1(((U8*)s)[5]) & 0xFE ) == 0x80 ) ? 6 : 4 )\ + : ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[4]) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[5]) ) ) ? 6 : 4 )\ + : 0 ) \ + : ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0x89 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? \ + ( ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ? \ + ( ( ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[4]) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[5]) ) ) ? 6 : 4 )\ + : 0 ) \ + : ( ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( ( ( 0x8E == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xD5 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( 0xA5 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( ( 0xD6 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xB4 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( ( 0xD5 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( ( ( NATIVE_TO_LATIN1(((U8*)s)[3]) & 0xF7 ) == 0xA5 ) || NATIVE_TO_LATIN1(((U8*)s)[3]) == 0xAB || NATIVE_TO_LATIN1(((U8*)s)[3]) == 0xB6 ) ) ? 4 : 0 )\ + : ( ( ( 0xBE == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xD5 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) && ( 0xB6 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xE1 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( 0xBC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( ( ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xD8 ) == 0x80 ) && ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[4]) ) ) ? 5 : 0 )\ + : ( ( ( ( 0xBD == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xF8 ) == 0xA0 ) || ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFB ) == 0xB0 ) || NATIVE_TO_LATIN1(((U8*)s)[2]) == 0xBC ) ) && ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[4]) ) ) ? 5 : 0 )\ + : 0 ) + + +/*** GENERATED CODE ***/ +#define is_MULTI_CHAR_FOLD_utf8_safe_part2(s,e) \ +( ( 0x61 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCA == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xBE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x66 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( 0x66 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( 0x69 == NATIVE_TO_LATIN1(((U8*)s)[2]) || 0x6C == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? 3 : 2 )\ + : ( 0x69 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0x6C == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 )\ + : ( 0x68 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xB1 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x69 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x87 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x6A == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x8C == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x73 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( 0x73 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0x74 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 )\ + : ( 0x74 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x88 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x77 == NATIVE_TO_LATIN1(((U8*)s)[0]) || 0x79 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ?\ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x8A == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0xC5 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( ( 0xBF == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xC5 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) && ( 0xBF == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xCA == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xBC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x6E == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xFD ) == 0xAC ) ? \ + ( ( ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xB1 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0xB7 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ?\ + ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? \ + ( ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ? 4 : 0 ) \ + : ( ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( ( ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xCF == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( 0x81 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0x93 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? \ + ( ( 0x93 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ? 4 : 0 ) \ + : ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0x89 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? \ + ( ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ? 4 : 0 ) \ + : ( ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( ( ( 0x8E == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xD5 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( 0xA5 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( ( 0xD6 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xB4 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( ( 0xD5 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( ( ( NATIVE_TO_LATIN1(((U8*)s)[3]) & 0xF7 ) == 0xA5 ) || NATIVE_TO_LATIN1(((U8*)s)[3]) == 0xAB || NATIVE_TO_LATIN1(((U8*)s)[3]) == 0xB6 ) ) ? 4 : 0 )\ + : ( ( ( 0xBE == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xD5 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) && ( 0xB6 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : 0 ) + + +/*** GENERATED CODE ***/ +#define is_MULTI_CHAR_FOLD_utf8_safe_part3(s,e) \ +( ((e)-(s) > 2) ? \ + ( ( 0x61 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCA == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xBE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x66 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( 0x66 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( 0x69 == NATIVE_TO_LATIN1(((U8*)s)[2]) || 0x6C == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? 3 : 2 )\ + : ( 0x69 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0x6C == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 )\ + : ( 0x68 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xB1 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x69 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x87 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x6A == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x8C == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x73 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( 0x73 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0x74 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 )\ + : ( 0x74 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x88 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x77 == NATIVE_TO_LATIN1(((U8*)s)[0]) || 0x79 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ?\ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x8A == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( ( ( 0xCA == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0xBC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( 0x6E == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ +: ((e)-(s) > 1) ? \ + ( ( 0x66 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( 0x66 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0x69 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0x6C == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 )\ + : ( ( 0x73 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x73 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0x74 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) ? 2 : 0 )\ : 0 ) /*** GENERATED CODE ***/ -#define is_MULTI_CHAR_FOLD_utf8(s) \ +#define is_MULTI_CHAR_FOLD_utf8_safe_part4(s,e) \ ( ( 0x61 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( ( 0xCA == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xBE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ -: ( 0x66 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x66 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ - ( ( 0x69 == NATIVE_TO_LATIN1(((U8*)s)[2]) || 0x6C == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? 3 : 2 )\ - : ( 0x69 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0x6C == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 )\ -: ( 0x68 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? is_MULTI_CHAR_FOLD_utf8_part0(s) : is_MULTI_CHAR_FOLD_utf8_part1(s) ) + ( ( ( 0xCA == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xBE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x66 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( 0x66 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( 0x69 == NATIVE_TO_LATIN1(((U8*)s)[2]) || 0x6C == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? 3 : 2 )\ + : ( 0x69 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0x6C == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 )\ + : ( 0x68 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xB1 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x69 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x87 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x6A == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x8C == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x73 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? is_MULTI_CHAR_FOLD_utf8_safe_part0(s,e) : is_MULTI_CHAR_FOLD_utf8_safe_part1(s,e) ) + + +/*** GENERATED CODE ***/ +#define is_MULTI_CHAR_FOLD_utf8_safe_part5(s,e) \ +( ((e)-(s) > 4) ? \ + ( ( 0x61 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCA == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xBE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x66 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( 0x66 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( 0x69 == NATIVE_TO_LATIN1(((U8*)s)[2]) || 0x6C == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? 3 : 2 )\ + : ( 0x69 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0x6C == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 )\ + : ( 0x68 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xB1 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x69 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x87 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x6A == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x8C == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x73 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( 0x73 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0x74 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 )\ + : ( 0x74 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x88 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0x77 == NATIVE_TO_LATIN1(((U8*)s)[0]) || 0x79 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ?\ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x8A == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0xC5 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( ( 0xBF == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xC5 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) && ( 0xBF == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xCA == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( 0xBC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0x6E == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) ? 3 : 0 )\ + : ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xFD ) == 0xAC ) ? \ + ( ( ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xB1 == NATIVE_TO_LATIN1(((U8*)s)[1]) || 0xB7 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ?\ + ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? \ + ( ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ? 4 : 0 ) \ + : ( ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( ( ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xCF == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( 0x81 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0x93 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( 0xCC == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? \ + ( ( 0x93 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ? 4 : 0 ) \ + : ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0x89 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( 0xCD == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ? \ + ( ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ? 4 : 0 ) \ + : ( ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( ( ( 0x8E == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xD5 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( 0xA5 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( ( 0xD6 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( 0x82 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xB4 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( ( 0xD5 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) && ( ( ( NATIVE_TO_LATIN1(((U8*)s)[3]) & 0xF7 ) == 0xA5 ) || NATIVE_TO_LATIN1(((U8*)s)[3]) == 0xAB || NATIVE_TO_LATIN1(((U8*)s)[3]) == 0xB6 ) ) ? 4 : 0 )\ + : ( ( ( 0xBE == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( 0xD5 == NATIVE_TO_LATIN1(((U8*)s)[2]) ) ) && ( 0xB6 == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) ? 4 : 0 )\ + : ( 0xE1 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ + ( ( 0xBC == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? \ + ( ( ( ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xD8 ) == 0x80 ) && ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[4]) ) ) ? 5 : 0 )\ + : ( ( ( ( 0xBD == NATIVE_TO_LATIN1(((U8*)s)[1]) ) && ( ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xF8 ) == 0xA0 ) || ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFB ) == 0xB0 ) || NATIVE_TO_LATIN1(((U8*)s)[2]) == 0xBC ) ) && ( 0xCE == NATIVE_TO_LATIN1(((U8*)s)[3]) ) ) && ( 0xB9 == NATIVE_TO_LATIN1(((U8*)s)[4]) ) ) ? 5 : 0 )\ + : 0 ) \ +: ((e)-(s) > 3) ? is_MULTI_CHAR_FOLD_utf8_safe_part2(s,e) : is_MULTI_CHAR_FOLD_utf8_safe_part3(s,e) ) + + +/*** GENERATED CODE ***/ +#define is_MULTI_CHAR_FOLD_utf8_safe(s,e) \ +( ((e)-(s) > 5) ? is_MULTI_CHAR_FOLD_utf8_safe_part4(s,e) : is_MULTI_CHAR_FOLD_utf8_safe_part5(s,e) ) /* MULTI_CHAR_FOLD: multi-char strings that are folded to by a single character @@ -674,12 +820,18 @@ ®charclass_multi_char_folds::multi_char_folds(0) */ /*** GENERATED CODE ***/ -#define is_MULTI_CHAR_FOLD_latin1(s) \ -( ( ( NATIVE_TO_LATIN1(((U8*)s)[0]) & 0xDF ) == 0x46 ) ? \ - ( ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xDF ) == 0x46 ) ? \ - ( ( ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xDF ) == 0x49 ) || ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xDF ) == 0x4C ) ) ? 3 : 2 )\ - : ( ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xDF ) == 0x49 ) || ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xDF ) == 0x4C ) ) ? 2 : 0 )\ -: ( ( ( NATIVE_TO_LATIN1(((U8*)s)[0]) & 0xDF ) == 0x53 ) && ( ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xDF ) == 0x53 ) || ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xDF ) == 0x54 ) ) ) ? 2 : 0 ) +#define is_MULTI_CHAR_FOLD_latin1_safe(s,e) \ +( ((e)-(s) > 2) ? \ + ( ( ( NATIVE_TO_LATIN1(((U8*)s)[0]) & 0xDF ) == 0x46 ) ? \ + ( ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xDF ) == 0x46 ) ? \ + ( ( ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xDF ) == 0x49 ) || ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xDF ) == 0x4C ) ) ? 3 : 2 )\ + : ( ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xDF ) == 0x49 ) || ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xDF ) == 0x4C ) ) ? 2 : 0 )\ + : ( ( ( NATIVE_TO_LATIN1(((U8*)s)[0]) & 0xDF ) == 0x53 ) && ( ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xDF ) == 0x53 ) || ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xDF ) == 0x54 ) ) ) ? 2 : 0 )\ +: ((e)-(s) > 1) ? \ + ( ( ( NATIVE_TO_LATIN1(((U8*)s)[0]) & 0xDF ) == 0x46 ) ? \ + ( ( ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xDF ) == 0x46 ) || ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xDF ) == 0x49 ) || ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xDF ) == 0x4C ) ) ? 2 : 0 )\ + : ( ( ( NATIVE_TO_LATIN1(((U8*)s)[0]) & 0xDF ) == 0x53 ) && ( ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xDF ) == 0x53 ) || ( ( NATIVE_TO_LATIN1(((U8*)s)[1]) & 0xDF ) == 0x54 ) ) ) ? 2 : 0 )\ +: 0 ) /* FOLDS_TO_MULTI: characters that fold to multi-char strings diff --git a/regcomp.c b/regcomp.c index 85077e6..91b0b90 100644 --- a/regcomp.c +++ b/regcomp.c @@ -2109,7 +2109,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, foldlen -= UTF8SKIP(uc); } else { - foldlen = is_MULTI_CHAR_FOLD_utf8(uc); + foldlen = is_MULTI_CHAR_FOLD_utf8_safe(uc, e); minbytes++; } } @@ -2126,7 +2126,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, foldlen--; } else { - foldlen = is_MULTI_CHAR_FOLD_latin1(uc); + foldlen = is_MULTI_CHAR_FOLD_latin1_safe(uc, e); minbytes++; } } @@ -3365,7 +3365,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, length sequence we are looking for is 2 */ { int count = 0; /* How many characters in a multi-char fold */ - int len = is_MULTI_CHAR_FOLD_utf8(s); + int len = is_MULTI_CHAR_FOLD_utf8_safe(s, s_end); if (! len) { /* Not a multi-char fold: get next char */ s += UTF8SKIP(s); continue; @@ -3458,7 +3458,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, : s_end -1; while (s < upper) { - int len = is_MULTI_CHAR_FOLD_latin1(s); + int len = is_MULTI_CHAR_FOLD_latin1_safe(s, s_end); if (! len) { /* Not a multi-char fold. */ if (*s == LATIN_SMALL_LETTER_SHARP_S && (OP(scan) == EXACTF || OP(scan) == EXACTFL)) diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl index 10e9504..f7422bf 100755 --- a/regen/regcharclass.pl +++ b/regen/regcharclass.pl @@ -1618,14 +1618,14 @@ QUOTEMETA: Meta-characters that \Q should quote \p{_Perl_Quotemeta} MULTI_CHAR_FOLD: multi-char strings that are folded to by a single character -=> UTF8 :fast +=> UTF8 :safe do regen/regcharclass_multi_char_folds.pl # 1 => All folds ®charclass_multi_char_folds::multi_char_folds(1) MULTI_CHAR_FOLD: multi-char strings that are folded to by a single character -=> LATIN1 :fast +=> LATIN1 : safe ®charclass_multi_char_folds::multi_char_folds(0) # 0 => Latin1-only diff --git a/regexec.c b/regexec.c index 907e662..e385fe7 100644 --- a/regexec.c +++ b/regexec.c @@ -3615,8 +3615,8 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p, } } - if ((is_utf8_pat && is_MULTI_CHAR_FOLD_utf8(pat)) - || (!is_utf8_pat && is_MULTI_CHAR_FOLD_latin1(pat))) + if ((is_utf8_pat && is_MULTI_CHAR_FOLD_utf8_safe(pat, pat_end)) + || (!is_utf8_pat && is_MULTI_CHAR_FOLD_latin1_safe(pat, pat_end))) { /* Multi-character folds require more context to sort out. Also * PL_utf8_foldclosures used below doesn't handle them, so have to -- 2.7.4