From 6588300d7f2f5b968789662bcdf05c558384614c Mon Sep 17 00:00:00 2001 From: Nicholas Clark Date: Wed, 11 Sep 2013 12:18:43 +0100 Subject: [PATCH] The choice of 7 or 13 byte extended UTF-8 should be based on UVSIZE. Previously it was based on HAS_QUAD, which is not (as) correct. --- utf8.c | 4 ++-- utf8.h | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/utf8.c b/utf8.c index 2e157df..f07e8ec 100644 --- a/utf8.c +++ b/utf8.c @@ -190,7 +190,7 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags) *d++ = (U8)(( uv & 0x3f) | 0x80); return d; } -#ifdef HAS_QUAD +#ifdef UTF8_QUAD_MAX if (uv < UTF8_QUAD_MAX) #endif { @@ -203,7 +203,7 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags) *d++ = (U8)(( uv & 0x3f) | 0x80); return d; } -#ifdef HAS_QUAD +#ifdef UTF8_QUAD_MAX { *d++ = 0xff; /* Can't match U+FFFE! */ *d++ = 0x80; /* 6 Reserved bits */ diff --git a/utf8.h b/utf8.h index 5880aa3..76b89a4 100644 --- a/utf8.h +++ b/utf8.h @@ -233,7 +233,9 @@ Perl's extended UTF-8 means we can have start bytes up to FF. (((UV) UTF_CONTINUATION_MASK) << ((sizeof(UV) * CHARBITS) \ - UTF_ACCUMULATION_SHIFT)) -#ifdef HAS_QUAD +#if UVSIZE >= 8 +# define UTF8_QUAD_MAX UINT64_C(0x1000000000) + /* Input is a true Unicode (not-native) code point */ #define OFFUNISKIP(uv) ( (uv) < 0x80 ? 1 : \ (uv) < 0x800 ? 2 : \ @@ -521,10 +523,6 @@ Perl's extended UTF-8 means we can have start bytes up to FF. #define UNICODE_IS_SUPER(c) ((c) > PERL_UNICODE_MAX) #define UNICODE_IS_FE_FF(c) ((c) > 0x7FFFFFFF) -#ifdef HAS_QUAD -# define UTF8_QUAD_MAX UINT64_C(0x1000000000) -#endif - #define LATIN_SMALL_LETTER_SHARP_S LATIN_SMALL_LETTER_SHARP_S_NATIVE #define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS \ LATIN_SMALL_LETTER_Y_WITH_DIAERESIS_NATIVE -- 2.7.4