Use byte domain EBCDIC/LATIN1 macro where appropriate

author Karl Williamson <public@khwilliamson.com>

Fri, 8 Feb 2013 02:53:38 +0000 (19:53 -0700)

committer Karl Williamson <public@khwilliamson.com>

Thu, 29 Aug 2013 15:55:49 +0000 (09:55 -0600)
author Karl Williamson <public@khwilliamson.com>
Fri, 8 Feb 2013 02:53:38 +0000 (19:53 -0700)
committer Karl Williamson <public@khwilliamson.com>
Thu, 29 Aug 2013 15:55:49 +0000 (09:55 -0600)
diff --git a/handy.h b/handy.h

index bc92ad9..144d2a1 100644 (file)
--- a/handy.h
+++ b/handy.h
@@ -850,7 +850,8 @@ patched there.  The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
  #define FITS_IN_8_BITS(c) ((sizeof(c) == 1) || !(((WIDEST_UTYPE)(c)) & ~0xFF))
  
  #ifdef EBCDIC
-#   define isASCII(c)    (FITS_IN_8_BITS(c) && (NATIVE_TO_UNI((U8) (c)) < 128))
+#   define isASCII(c)    (FITS_IN_8_BITS(c)                      \
+                         && (NATIVE_TO_LATIN1((U8) (c)) < 128))
  #else
  #   define isASCII(c)    ((WIDEST_UTYPE)(c) < 128)
  #endif
@@ -989,7 +990,7 @@ EXTCONST U32 PL_charclass[];
      /* The 1U keeps Solaris from griping when shifting sets the uppermost bit */
  #   define _CC_mask(classnum) (1U << (classnum))
  #   define _generic_isCC(c, classnum) cBOOL(FITS_IN_8_BITS(c) \
-                && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_mask(classnum)))
+                && (PL_charclass[(U8) NATIVE_TO_LATIN1(c)] & _CC_mask(classnum)))
  
      /* The mask for the _A versions of the macros; it just adds in the bit for
       * ASCII. */
@@ -998,7 +999,7 @@ EXTCONST U32 PL_charclass[];
      /* The _A version makes sure that both the desired bit and the ASCII bit
       * are present */
  #   define _generic_isCC_A(c, classnum) (FITS_IN_8_BITS(c) \
-        && ((PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_mask_A(classnum)) \
+        && ((PL_charclass[(U8) NATIVE_TO_LATIN1(c)] & _CC_mask_A(classnum)) \
                                  == _CC_mask_A(classnum)))
  
  #   define isALPHA_A(c)  _generic_isCC_A(c, _CC_ALPHA)
@@ -1019,7 +1020,7 @@ EXTCONST U32 PL_charclass[];
  
      /* Either participates in a fold with a character above 255, or is a
       * multi-char fold */
-#   define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_mask(_CC_NONLATIN1_FOLD)))
+#   define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) NATIVE_TO_LATIN1(c)] & _CC_mask(_CC_NONLATIN1_FOLD)))
  
  #   define _isQUOTEMETA(c) _generic_isCC(c, _CC_QUOTEMETA)
  #   define _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
@@ -1094,19 +1095,19 @@ EXTCONST U32 PL_charclass[];
         * for backwards compatibility */
      /* ALPHAU includes Unicode semantics for latin1 characters.  It has an extra
       * >= AA test to speed up ASCII-only tests at the expense of the others */
-#   define isALPHA_L1(c) (isALPHA(c) || (NATIVE_TO_UNI((U8) c) >= 0xAA \
-       && ((NATIVE_TO_UNI((U8) c) >= 0xC0 \
-             && NATIVE_TO_UNI((U8) c) != 0xD7 && NATIVE_TO_UNI((U8) c) != 0xF7) \
-           || NATIVE_TO_UNI((U8) c) == 0xAA \
-           || NATIVE_TO_UNI((U8) c) == 0xB5 \
-           || NATIVE_TO_UNI((U8) c) == 0xBA)))
+#   define isALPHA_L1(c) (isALPHA(c) || (NATIVE_TO_LATIN1((U8) c) >= 0xAA \
+       && ((NATIVE_TO_LATIN1((U8) c) >= 0xC0 \
+             && NATIVE_TO_LATIN1((U8) c) != 0xD7 && NATIVE_TO_LATIN1((U8) c) != 0xF7) \
+           || NATIVE_TO_LATIN1((U8) c) == 0xAA \
+           || NATIVE_TO_LATIN1((U8) c) == 0xB5 \
+           || NATIVE_TO_LATIN1((U8) c) == 0xBA)))
  #   define isCHARNAME_CONT(c) (isWORDCHAR_L1(c)                         \
                                 || (c) == ' '                            \
                                 || (c) == '-'                            \
                                 || (c) == '('                            \
                                 || (c) == ')'                            \
                                 || (c) == ':'                            \
-                               || NATIVE_TO_UNI((U8) c) == 0xA0)
+                               || NATIVE_TO_LATIN1((U8) c) == 0xA0)
  #endif
  
  /* Macros that differ between EBCDIC and ASCII.  Where C89 defines a function,
@@ -1118,7 +1119,7 @@ EXTCONST U32 PL_charclass[];
  #ifdef EBCDIC
  #   define isALPHA(c)  isalpha(c)
  #   define isALPHANUMERIC(c)   isalnum(c)
-#   define isBLANK(c)  ((c) == ' ' || (c) == '\t' || NATIVE_TO_UNI(c) == 0xA0)
+#   define isBLANK(c)  ((c) == ' ' || (c) == '\t' || NATIVE_TO_LATIN1(c) == 0xA0)
  #   define isCNTRL(c)  iscntrl(c)
  #   define isDIGIT(c)  isdigit(c)
  #   define isGRAPH(c)  isgraph(c)
@@ -1175,19 +1176,18 @@ EXTCONST U32 PL_charclass[];
  /* Use table lookup for speed; return error character for input
   * out-of-range */
  #define toLOWER_LATIN1(c)    ((! FITS_IN_8_BITS(c))                        \
-                             ? (c)                                           \
-                             : UNI_TO_NATIVE(PL_latin1_lc[                 \
-                                               NATIVE_TO_UNI( (U8) (c)) ]))
+                             ? (c)                                         \
+                             : LATIN1_TO_NATIVE(PL_latin1_lc[              \
+                                            NATIVE_TO_LATIN1( (U8) (c)) ]))
  #define toLOWER_L1(c)    toLOWER_LATIN1(c)  /* Synonym for consistency */
  
  /* Modified uc.  Is correct uc except for three non-ascii chars which are
   * all mapped to one of them, and these need special handling; error
   * character for input out-of-range */
  #define toUPPER_LATIN1_MOD(c) ((! FITS_IN_8_BITS(c))                       \
-                               ? (c)                                         \
-                               : UNI_TO_NATIVE(PL_mod_latin1_uc[           \
-                                               NATIVE_TO_UNI( (U8) (c)) ]))
-
+                               ? (c)                                       \
+                               : LATIN1_TO_NATIVE(PL_mod_latin1_uc[        \
+                                            NATIVE_TO_LATIN1( (U8) (c)) ]))
  #ifdef USE_NEXT_CTYPE
  
  #  define isALPHANUMERIC_LC(c) NXIsAlNum((unsigned int)(c))
@@ -1498,7 +1498,7 @@ EXTCONST U32 PL_charclass[];
   * CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII, except that they don't
   * necessarily mean the same characters, e.g. CTRL-D is 4 on both systems, but
   * that is EOT on ASCII;  ST on EBCDIC */
-#  define toCTRL(c)    (toUPPER(NATIVE_TO_UNI(c)) ^ 64)
+#  define toCTRL(c)    (toUPPER(NATIVE_TO_LATIN1(c)) ^ 64)
  
  /* Line numbers are unsigned, 32 bits. */
  typedef U32 line_t;
diff --git a/regcomp.c b/regcomp.c

index 5a1e234..440d26a 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -4276,8 +4276,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                         if (!(data->start_class->flags & ANYOF_LOCALE)) {
                             ANYOF_CLASS_CLEAR(data->start_class, classnum_to_namedclass(classnum) + 1);
                              for (value = 0; value < loop_max; value++) {
-                                if (! _generic_isCC(UNI_TO_NATIVE(value), classnum)) {
-                                    ANYOF_BITMAP_CLEAR(data->start_class, UNI_TO_NATIVE(value));
+                                if (! _generic_isCC(LATIN1_TO_NATIVE(value), classnum)) {
+                                    ANYOF_BITMAP_CLEAR(data->start_class, LATIN1_TO_NATIVE(value));
                                  }
                              }
                         }
@@ -4292,8 +4292,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                          * in case it isn't a true locale-node.  This will
                          * create false positives if it truly is locale */
                          for (value = 0; value < loop_max; value++) {
-                            if (_generic_isCC(UNI_TO_NATIVE(value), classnum)) {
-                                ANYOF_BITMAP_SET(data->start_class, UNI_TO_NATIVE(value));
+                            if (_generic_isCC(LATIN1_TO_NATIVE(value), classnum)) {
+                                ANYOF_BITMAP_SET(data->start_class, LATIN1_TO_NATIVE(value));
                              }
                          }
                          }
@@ -4310,8 +4310,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                         if (!(data->start_class->flags & ANYOF_LOCALE)) {
                             ANYOF_CLASS_CLEAR(data->start_class, classnum_to_namedclass(classnum));
                              for (value = 0; value < loop_max; value++) {
-                                if (_generic_isCC(UNI_TO_NATIVE(value), classnum)) {
-                                    ANYOF_BITMAP_CLEAR(data->start_class, UNI_TO_NATIVE(value));
+                                if (_generic_isCC(LATIN1_TO_NATIVE(value), classnum)) {
+                                    ANYOF_BITMAP_CLEAR(data->start_class, LATIN1_TO_NATIVE(value));
                                  }
                              }
                         }
@@ -4326,8 +4326,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                          * case it isn't a true locale-node.  This will create
                          * false positives if it truly is locale */
                          for (value = 0; value < loop_max; value++) {
-                            if (! _generic_isCC(UNI_TO_NATIVE(value), classnum)) {
-                                ANYOF_BITMAP_SET(data->start_class, UNI_TO_NATIVE(value));
+                            if (! _generic_isCC(LATIN1_TO_NATIVE(value), classnum)) {
+                                ANYOF_BITMAP_SET(data->start_class, LATIN1_TO_NATIVE(value));
                              }
                          }
                          if (PL_regkind[OP(scan)] == NPOSIXD) {
diff --git a/utf8.c b/utf8.c

index d33ff47..1bdad1b 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1413,7 +1413,7 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
         p += 2;
         if (uv < 0x80) {
  #ifdef EBCDIC
-           *d++ = UNI_TO_NATIVE(uv);
+           *d++ = LATIN1_TO_NATIVE(uv);
  #else
             *d++ = (U8)uv;
  #endif
@@ -4610,7 +4610,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const c
                     to_utf8_fold(p1, foldbuf1, &n1);
                 }
                 else {  /* Not utf8, get utf8 fold */
-                   to_uni_fold(NATIVE_TO_UNI(*p1), foldbuf1, &n1);
+                   to_uni_fold(NATIVE_TO_LATIN1(*p1), foldbuf1, &n1);
                 }
                 f1 = foldbuf1;
             }
@@ -4655,7 +4655,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const c
                     to_utf8_fold(p2, foldbuf2, &n2);
                 }
                 else {
-                   to_uni_fold(NATIVE_TO_UNI(*p2), foldbuf2, &n2);
+                   to_uni_fold(NATIVE_TO_LATIN1(*p2), foldbuf2, &n2);
                 }
                 f2 = foldbuf2;
             }
author	Karl Williamson <public@khwilliamson.com>
	Fri, 8 Feb 2013 02:53:38 +0000 (19:53 -0700)
committer	Karl Williamson <public@khwilliamson.com>
	Thu, 29 Aug 2013 15:55:49 +0000 (09:55 -0600)
handy.h		patch \| blob \| history
regcomp.c		patch \| blob \| history
utf8.c		patch \| blob \| history