From: Karl Williamson <public@khwilliamson.com>
Date: Fri, 8 Feb 2013 02:53:38 +0000 (-0700)
Subject: Use byte domain EBCDIC/LATIN1 macro where appropriate
X-Git-Tag: upstream/5.20.0~2089^2~83
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8e7c6e7db4e68025440138ddf851a3b4a61d8b5a;p=platform%2Fupstream%2Fperl.git

Use byte domain EBCDIC/LATIN1 macro where appropriate

The macros like NATIVE_TO_UNI will work on EBCDIC, but operate on the
whole Unicode range.  In the locations affected by this commit, it is
known that the domain is limited to a single byte, so the simpler ones
whose names contain LATIN1 may be used.

On ASCII platforms, all the macros are null, so there is no effective
change.
---

diff --git a/handy.h b/handy.h
index bc92ad9..144d2a1 100644
--- a/handy.h
+++ b/handy.h
@@ -850,7 +850,8 @@ patched there.  The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
 #define FITS_IN_8_BITS(c) ((sizeof(c) == 1) || !(((WIDEST_UTYPE)(c)) & ~0xFF))
 
 #ifdef EBCDIC
-#   define isASCII(c)    (FITS_IN_8_BITS(c) && (NATIVE_TO_UNI((U8) (c)) < 128))
+#   define isASCII(c)    (FITS_IN_8_BITS(c)                      \
+                         && (NATIVE_TO_LATIN1((U8) (c)) < 128))
 #else
 #   define isASCII(c)    ((WIDEST_UTYPE)(c) < 128)
 #endif
@@ -989,7 +990,7 @@ EXTCONST U32 PL_charclass[];
     /* The 1U keeps Solaris from griping when shifting sets the uppermost bit */
 #   define _CC_mask(classnum) (1U << (classnum))
 #   define _generic_isCC(c, classnum) cBOOL(FITS_IN_8_BITS(c) \
-                && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_mask(classnum)))
+                && (PL_charclass[(U8) NATIVE_TO_LATIN1(c)] & _CC_mask(classnum)))
 
     /* The mask for the _A versions of the macros; it just adds in the bit for
      * ASCII. */
@@ -998,7 +999,7 @@ EXTCONST U32 PL_charclass[];
     /* The _A version makes sure that both the desired bit and the ASCII bit
      * are present */
 #   define _generic_isCC_A(c, classnum) (FITS_IN_8_BITS(c) \
-        && ((PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_mask_A(classnum)) \
+        && ((PL_charclass[(U8) NATIVE_TO_LATIN1(c)] & _CC_mask_A(classnum)) \
                                 == _CC_mask_A(classnum)))
 
 #   define isALPHA_A(c)  _generic_isCC_A(c, _CC_ALPHA)
@@ -1019,7 +1020,7 @@ EXTCONST U32 PL_charclass[];
 
     /* Either participates in a fold with a character above 255, or is a
      * multi-char fold */
-#   define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_mask(_CC_NONLATIN1_FOLD)))
+#   define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) NATIVE_TO_LATIN1(c)] & _CC_mask(_CC_NONLATIN1_FOLD)))
 
 #   define _isQUOTEMETA(c) _generic_isCC(c, _CC_QUOTEMETA)
 #   define _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
@@ -1094,19 +1095,19 @@ EXTCONST U32 PL_charclass[];
        * for backwards compatibility */
     /* ALPHAU includes Unicode semantics for latin1 characters.  It has an extra
      * >= AA test to speed up ASCII-only tests at the expense of the others */
-#   define isALPHA_L1(c) (isALPHA(c) || (NATIVE_TO_UNI((U8) c) >= 0xAA \
-	&& ((NATIVE_TO_UNI((U8) c) >= 0xC0 \
-             && NATIVE_TO_UNI((U8) c) != 0xD7 && NATIVE_TO_UNI((U8) c) != 0xF7) \
-	    || NATIVE_TO_UNI((U8) c) == 0xAA \
-	    || NATIVE_TO_UNI((U8) c) == 0xB5 \
-	    || NATIVE_TO_UNI((U8) c) == 0xBA)))
+#   define isALPHA_L1(c) (isALPHA(c) || (NATIVE_TO_LATIN1((U8) c) >= 0xAA \
+	&& ((NATIVE_TO_LATIN1((U8) c) >= 0xC0 \
+             && NATIVE_TO_LATIN1((U8) c) != 0xD7 && NATIVE_TO_LATIN1((U8) c) != 0xF7) \
+	    || NATIVE_TO_LATIN1((U8) c) == 0xAA \
+	    || NATIVE_TO_LATIN1((U8) c) == 0xB5 \
+	    || NATIVE_TO_LATIN1((U8) c) == 0xBA)))
 #   define isCHARNAME_CONT(c) (isWORDCHAR_L1(c)                         \
                                || (c) == ' '                            \
                                || (c) == '-'                            \
                                || (c) == '('                            \
                                || (c) == ')'                            \
                                || (c) == ':'                            \
-                               || NATIVE_TO_UNI((U8) c) == 0xA0)
+                               || NATIVE_TO_LATIN1((U8) c) == 0xA0)
 #endif
 
 /* Macros that differ between EBCDIC and ASCII.  Where C89 defines a function,
@@ -1118,7 +1119,7 @@ EXTCONST U32 PL_charclass[];
 #ifdef EBCDIC
 #   define isALPHA(c)	isalpha(c)
 #   define isALPHANUMERIC(c)	isalnum(c)
-#   define isBLANK(c)	((c) == ' ' || (c) == '\t' || NATIVE_TO_UNI(c) == 0xA0)
+#   define isBLANK(c)	((c) == ' ' || (c) == '\t' || NATIVE_TO_LATIN1(c) == 0xA0)
 #   define isCNTRL(c)	iscntrl(c)
 #   define isDIGIT(c)	isdigit(c)
 #   define isGRAPH(c)	isgraph(c)
@@ -1175,19 +1176,18 @@ EXTCONST U32 PL_charclass[];
 /* Use table lookup for speed; return error character for input
  * out-of-range */
 #define toLOWER_LATIN1(c)    ((! FITS_IN_8_BITS(c))                        \
-                             ? (c)                                           \
-                             : UNI_TO_NATIVE(PL_latin1_lc[                 \
-                                               NATIVE_TO_UNI( (U8) (c)) ]))
+                             ? (c)                                         \
+                             : LATIN1_TO_NATIVE(PL_latin1_lc[              \
+                                            NATIVE_TO_LATIN1( (U8) (c)) ]))
 #define toLOWER_L1(c)    toLOWER_LATIN1(c)  /* Synonym for consistency */
 
 /* Modified uc.  Is correct uc except for three non-ascii chars which are
  * all mapped to one of them, and these need special handling; error
  * character for input out-of-range */
 #define toUPPER_LATIN1_MOD(c) ((! FITS_IN_8_BITS(c))                       \
-                               ? (c)                                         \
-                               : UNI_TO_NATIVE(PL_mod_latin1_uc[           \
-                                               NATIVE_TO_UNI( (U8) (c)) ]))
-
+                               ? (c)                                       \
+                               : LATIN1_TO_NATIVE(PL_mod_latin1_uc[        \
+                                            NATIVE_TO_LATIN1( (U8) (c)) ]))
 #ifdef USE_NEXT_CTYPE
 
 #  define isALPHANUMERIC_LC(c)	NXIsAlNum((unsigned int)(c))
@@ -1498,7 +1498,7 @@ EXTCONST U32 PL_charclass[];
  * CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII, except that they don't
  * necessarily mean the same characters, e.g. CTRL-D is 4 on both systems, but
  * that is EOT on ASCII;  ST on EBCDIC */
-#  define toCTRL(c)    (toUPPER(NATIVE_TO_UNI(c)) ^ 64)
+#  define toCTRL(c)    (toUPPER(NATIVE_TO_LATIN1(c)) ^ 64)
 
 /* Line numbers are unsigned, 32 bits. */
 typedef U32 line_t;
diff --git a/regcomp.c b/regcomp.c
index 5a1e234..440d26a 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4276,8 +4276,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
 			if (!(data->start_class->flags & ANYOF_LOCALE)) {
 			    ANYOF_CLASS_CLEAR(data->start_class, classnum_to_namedclass(classnum) + 1);
                             for (value = 0; value < loop_max; value++) {
-                                if (! _generic_isCC(UNI_TO_NATIVE(value), classnum)) {
-                                    ANYOF_BITMAP_CLEAR(data->start_class, UNI_TO_NATIVE(value));
+                                if (! _generic_isCC(LATIN1_TO_NATIVE(value), classnum)) {
+                                    ANYOF_BITMAP_CLEAR(data->start_class, LATIN1_TO_NATIVE(value));
                                 }
                             }
 			}
@@ -4292,8 +4292,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
 			 * in case it isn't a true locale-node.  This will
 			 * create false positives if it truly is locale */
                         for (value = 0; value < loop_max; value++) {
-                            if (_generic_isCC(UNI_TO_NATIVE(value), classnum)) {
-                                ANYOF_BITMAP_SET(data->start_class, UNI_TO_NATIVE(value));
+                            if (_generic_isCC(LATIN1_TO_NATIVE(value), classnum)) {
+                                ANYOF_BITMAP_SET(data->start_class, LATIN1_TO_NATIVE(value));
                             }
                         }
                         }
@@ -4310,8 +4310,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
 			if (!(data->start_class->flags & ANYOF_LOCALE)) {
 			    ANYOF_CLASS_CLEAR(data->start_class, classnum_to_namedclass(classnum));
                             for (value = 0; value < loop_max; value++) {
-                                if (_generic_isCC(UNI_TO_NATIVE(value), classnum)) {
-                                    ANYOF_BITMAP_CLEAR(data->start_class, UNI_TO_NATIVE(value));
+                                if (_generic_isCC(LATIN1_TO_NATIVE(value), classnum)) {
+                                    ANYOF_BITMAP_CLEAR(data->start_class, LATIN1_TO_NATIVE(value));
                                 }
                             }
 			}
@@ -4326,8 +4326,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
 			 * case it isn't a true locale-node.  This will create
 			 * false positives if it truly is locale */
                         for (value = 0; value < loop_max; value++) {
-                            if (! _generic_isCC(UNI_TO_NATIVE(value), classnum)) {
-                                ANYOF_BITMAP_SET(data->start_class, UNI_TO_NATIVE(value));
+                            if (! _generic_isCC(LATIN1_TO_NATIVE(value), classnum)) {
+                                ANYOF_BITMAP_SET(data->start_class, LATIN1_TO_NATIVE(value));
                             }
                         }
                         if (PL_regkind[OP(scan)] == NPOSIXD) {
diff --git a/utf8.c b/utf8.c
index d33ff47..1bdad1b 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1413,7 +1413,7 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
 	p += 2;
 	if (uv < 0x80) {
 #ifdef EBCDIC
-	    *d++ = UNI_TO_NATIVE(uv);
+	    *d++ = LATIN1_TO_NATIVE(uv);
 #else
 	    *d++ = (U8)uv;
 #endif
@@ -4610,7 +4610,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const c
 		    to_utf8_fold(p1, foldbuf1, &n1);
 		}
 		else {  /* Not utf8, get utf8 fold */
-		    to_uni_fold(NATIVE_TO_UNI(*p1), foldbuf1, &n1);
+		    to_uni_fold(NATIVE_TO_LATIN1(*p1), foldbuf1, &n1);
 		}
 		f1 = foldbuf1;
 	    }
@@ -4655,7 +4655,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const c
 		    to_utf8_fold(p2, foldbuf2, &n2);
 		}
 		else {
-		    to_uni_fold(NATIVE_TO_UNI(*p2), foldbuf2, &n2);
+		    to_uni_fold(NATIVE_TO_LATIN1(*p2), foldbuf2, &n2);
 		}
 		f2 = foldbuf2;
 	    }