Remove EBCDIC remappings

author Karl Williamson <public@khwilliamson.com>

Sun, 17 Feb 2013 20:47:13 +0000 (13:47 -0700)

committer Karl Williamson <public@khwilliamson.com>

Thu, 29 Aug 2013 15:55:52 +0000 (09:55 -0600)
author Karl Williamson <public@khwilliamson.com>
Sun, 17 Feb 2013 20:47:13 +0000 (13:47 -0700)
committer Karl Williamson <public@khwilliamson.com>
Thu, 29 Aug 2013 15:55:52 +0000 (09:55 -0600)
diff --git a/handy.h b/handy.h

index 73fdaca..7b6db33 100644 (file)
--- a/handy.h
+++ b/handy.h
@@ -990,7 +990,7 @@ EXTCONST U32 PL_charclass[];
      /* The 1U keeps Solaris from griping when shifting sets the uppermost bit */
  #   define _CC_mask(classnum) (1U << (classnum))
  #   define _generic_isCC(c, classnum) cBOOL(FITS_IN_8_BITS(c) \
-                && (PL_charclass[(U8) NATIVE_TO_LATIN1(c)] & _CC_mask(classnum)))
+                && (PL_charclass[(U8) (c)] & _CC_mask(classnum)))
  
      /* The mask for the _A versions of the macros; it just adds in the bit for
       * ASCII. */
@@ -999,7 +999,7 @@ EXTCONST U32 PL_charclass[];
      /* The _A version makes sure that both the desired bit and the ASCII bit
       * are present */
  #   define _generic_isCC_A(c, classnum) (FITS_IN_8_BITS(c) \
-        && ((PL_charclass[(U8) NATIVE_TO_LATIN1(c)] & _CC_mask_A(classnum)) \
+        && ((PL_charclass[(U8) (c)] & _CC_mask_A(classnum)) \
                                  == _CC_mask_A(classnum)))
  
  #   define isALPHA_A(c)  _generic_isCC_A(c, _CC_ALPHA)
@@ -1020,7 +1020,7 @@ EXTCONST U32 PL_charclass[];
  
      /* Either participates in a fold with a character above 255, or is a
       * multi-char fold */
-#   define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) NATIVE_TO_LATIN1(c)] & _CC_mask(_CC_NONLATIN1_FOLD)))
+#   define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_FOLD)))
  
  #   define _isQUOTEMETA(c) _generic_isCC(c, _CC_QUOTEMETA)
  #   define _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
@@ -1177,8 +1177,7 @@ EXTCONST U32 PL_charclass[];
   * out-of-range */
  #define toLOWER_LATIN1(c)    ((! FITS_IN_8_BITS(c))                        \
                               ? (c)                                         \
-                             : LATIN1_TO_NATIVE(PL_latin1_lc[              \
-                                            NATIVE_TO_LATIN1( (U8) (c)) ]))
+                             : PL_latin1_lc[ (U8) (c) ])
  #define toLOWER_L1(c)    toLOWER_LATIN1(c)  /* Synonym for consistency */
  
  /* Modified uc.  Is correct uc except for three non-ascii chars which are
@@ -1186,8 +1185,7 @@ EXTCONST U32 PL_charclass[];
   * character for input out-of-range */
  #define toUPPER_LATIN1_MOD(c) ((! FITS_IN_8_BITS(c))                       \
                                 ? (c)                                       \
-                               : LATIN1_TO_NATIVE(PL_mod_latin1_uc[        \
-                                            NATIVE_TO_LATIN1( (U8) (c)) ]))
+                               : PL_mod_latin1_uc[ (U8) (c) ])
  #ifdef USE_NEXT_CTYPE
  
  #  define isALPHANUMERIC_LC(c) NXIsAlNum((unsigned int)(c))
diff --git a/perly.c b/perly.c

index d7d9ea3..5a934dc 100644 (file)
--- a/perly.c
+++ b/perly.c
@@ -342,9 +342,12 @@ Perl_yyparse (pTHX_ int gramtype)
         parser->yychar = yylex();
  #endif
  
+/* perly.tab is shipped based on an ASCII system; if it were to be regenerated
+ * on a platform that doesn't use ASCII, this translation back would need to be
+ * removed */
  #  ifdef EBCDIC
         if (parser->yychar >= 0 && parser->yychar < 255) {
-           parser->yychar = NATIVE_TO_ASCII(parser->yychar);
+           parser->yychar = NATIVE_TO_LATIN1(parser->yychar);
         }
  #  endif
      }
diff --git a/pp.c b/pp.c

index cd50626..111012d 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -4244,7 +4244,7 @@ PP(pp_fc)
                      for (; s < send; s++) {
                          STRLEN ulen;
                          UV fc = _to_uni_fold_flags(*s, tmpbuf, &ulen, flags);
-                        if UNI_IS_INVARIANT(fc) {
+                        if NATIVE_IS_INVARIANT(fc) {
                              if (full_folding
                                  && *s == LATIN_SMALL_LETTER_SHARP_S)
                              {
diff --git a/regcomp.c b/regcomp.c

index 34aefa1..bb89a54 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -1737,7 +1737,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                     if ( !UTF ) {
                         /* store first byte of utf8 representation of
                            variant codepoints */
-                       if (! UNI_IS_INVARIANT(uvc)) {
+                       if (! NATIVE_IS_INVARIANT(uvc)) {
                             TRIE_BITMAP_SET(trie, UTF8_TWO_BYTE_HI(uvc));
                         }
                     }
@@ -4276,8 +4276,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                         if (!(data->start_class->flags & ANYOF_LOCALE)) {
                             ANYOF_CLASS_CLEAR(data->start_class, classnum_to_namedclass(classnum) + 1);
                              for (value = 0; value < loop_max; value++) {
-                                if (! _generic_isCC(LATIN1_TO_NATIVE(value), classnum)) {
-                                    ANYOF_BITMAP_CLEAR(data->start_class, LATIN1_TO_NATIVE(value));
+                                if (! _generic_isCC(value, classnum)) {
+                                    ANYOF_BITMAP_CLEAR(data->start_class, value);
                                  }
                              }
                         }
@@ -4292,8 +4292,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                          * in case it isn't a true locale-node.  This will
                          * create false positives if it truly is locale */
                          for (value = 0; value < loop_max; value++) {
-                            if (_generic_isCC(LATIN1_TO_NATIVE(value), classnum)) {
-                                ANYOF_BITMAP_SET(data->start_class, LATIN1_TO_NATIVE(value));
+                            if (_generic_isCC(value, classnum)) {
+                                ANYOF_BITMAP_SET(data->start_class, value);
                              }
                          }
                          }
@@ -4310,8 +4310,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                         if (!(data->start_class->flags & ANYOF_LOCALE)) {
                             ANYOF_CLASS_CLEAR(data->start_class, classnum_to_namedclass(classnum));
                              for (value = 0; value < loop_max; value++) {
-                                if (_generic_isCC(LATIN1_TO_NATIVE(value), classnum)) {
-                                    ANYOF_BITMAP_CLEAR(data->start_class, LATIN1_TO_NATIVE(value));
+                                if (_generic_isCC(value, classnum)) {
+                                    ANYOF_BITMAP_CLEAR(data->start_class, value);
                                  }
                              }
                         }
@@ -4326,8 +4326,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n",
                          * case it isn't a true locale-node.  This will create
                          * false positives if it truly is locale */
                          for (value = 0; value < loop_max; value++) {
-                            if (! _generic_isCC(LATIN1_TO_NATIVE(value), classnum)) {
-                                ANYOF_BITMAP_SET(data->start_class, LATIN1_TO_NATIVE(value));
+                            if (! _generic_isCC(value, classnum)) {
+                                ANYOF_BITMAP_SET(data->start_class, value);
                              }
                          }
                          if (PL_regkind[OP(scan)] == NPOSIXD) {
@@ -10183,7 +10183,7 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, regnode *node, I32
      if (! len_passed_in) {
          if (UTF) {
              if (FOLD && (! LOC || code_point > 255)) {
-                _to_uni_fold_flags(NATIVE_TO_UNI(code_point),
+                _to_uni_fold_flags(code_point,
                                     character,
                                     &len,
                                     FOLD_FLAGS_FULL | ((LOC)
diff --git a/regexec.c b/regexec.c

index 384e4e7..db6b730 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -1629,13 +1629,13 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
      case BOUNDL:
          RXp_MATCH_TAINTED_on(prog);
          FBC_BOUND(isWORDCHAR_LC,
-                  isWORDCHAR_LC_uvchr(UNI_TO_NATIVE(tmp)),
+                  isWORDCHAR_LC_uvchr(tmp),
                    isWORDCHAR_LC_utf8((U8*)s));
          break;
      case NBOUNDL:
          RXp_MATCH_TAINTED_on(prog);
          FBC_NBOUND(isWORDCHAR_LC,
-                   isWORDCHAR_LC_uvchr(UNI_TO_NATIVE(tmp)),
+                   isWORDCHAR_LC_uvchr(tmp),
                     isWORDCHAR_LC_utf8((U8*)s));
          break;
      case BOUND:
@@ -4305,7 +4305,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                      }
                 }
                 else {
-                   ln = isWORDCHAR_LC_uvchr(UNI_TO_NATIVE(ln));
+                   ln = isWORDCHAR_LC_uvchr(ln);
                     n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_LC_utf8((U8*)locinput);
                 }
             }
diff --git a/utf8.c b/utf8.c

index 2d827a1..8d7e6de 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1407,11 +1407,7 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
         UV uv = (p[0] << 8) + p[1]; /* UTF-16BE */
         p += 2;
         if (uv < 0x80) {
-#ifdef EBCDIC
-           *d++ = LATIN1_TO_NATIVE(uv);
-#else
             *d++ = (U8)uv;
-#endif
             continue;
         }
         if (uv < 0x800) {
@@ -1645,8 +1641,8 @@ Perl__to_upper_title_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp, const char S_
  
      assert(S_or_s == 'S' || S_or_s == 's');
  
-    if (UNI_IS_INVARIANT(converted)) { /* No difference between the two for
-                                         characters in this range */
+    if (NATIVE_IS_INVARIANT(converted)) { /* No difference between the two for
+                                            characters in this range */
         *p = (U8) converted;
         *lenp = 1;
         return converted;
@@ -1746,7 +1742,7 @@ S_to_lower_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp)
      U8 converted = toLOWER_LATIN1(c);
  
      if (p != NULL) {
-       if (UNI_IS_INVARIANT(converted)) {
+       if (NATIVE_IS_INVARIANT(converted)) {
             *p = converted;
             *lenp = 1;
         }
@@ -1816,7 +1812,7 @@ Perl__to_fold_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp, const unsigned int f
         converted = toLOWER_LATIN1(c);
      }
  
-    if (UNI_IS_INVARIANT(converted)) {
+    if (NATIVE_IS_INVARIANT(converted)) {
         *p = (U8) converted;
         *lenp = 1;
      }
@@ -1869,7 +1865,7 @@ bool
  Perl_is_uni_alnum_lc(pTHX_ UV c)
  {
      if (c < 256) {
-        return isALNUM_LC(UNI_TO_NATIVE(c));
+        return isALNUM_LC(c);
      }
      return _is_uni_FOO(_CC_WORDCHAR, c);
  }
@@ -1878,7 +1874,7 @@ bool
  Perl_is_uni_alnumc_lc(pTHX_ UV c)
  {
      if (c < 256) {
-        return isALPHANUMERIC_LC(UNI_TO_NATIVE(c));
+        return isALPHANUMERIC_LC(c);
      }
      return _is_uni_FOO(_CC_ALPHANUMERIC, c);
  }
@@ -1887,7 +1883,7 @@ bool
  Perl_is_uni_idfirst_lc(pTHX_ UV c)
  {
      if (c < 256) {
-        return isIDFIRST_LC(UNI_TO_NATIVE(c));
+        return isIDFIRST_LC(c);
      }
      return _is_uni_perl_idstart(c);
  }
@@ -1896,7 +1892,7 @@ bool
  Perl_is_uni_alpha_lc(pTHX_ UV c)
  {
      if (c < 256) {
-        return isALPHA_LC(UNI_TO_NATIVE(c));
+        return isALPHA_LC(c);
      }
      return _is_uni_FOO(_CC_ALPHA, c);
  }
@@ -1905,7 +1901,7 @@ bool
  Perl_is_uni_ascii_lc(pTHX_ UV c)
  {
      if (c < 256) {
-        return isASCII_LC(UNI_TO_NATIVE(c));
+        return isASCII_LC(c);
      }
      return 0;
  }
@@ -1914,7 +1910,7 @@ bool
  Perl_is_uni_blank_lc(pTHX_ UV c)
  {
      if (c < 256) {
-        return isBLANK_LC(UNI_TO_NATIVE(c));
+        return isBLANK_LC(c);
      }
      return isBLANK_uni(c);
  }
@@ -1923,7 +1919,7 @@ bool
  Perl_is_uni_space_lc(pTHX_ UV c)
  {
      if (c < 256) {
-        return isSPACE_LC(UNI_TO_NATIVE(c));
+        return isSPACE_LC(c);
      }
      return isSPACE_uni(c);
  }
@@ -1932,7 +1928,7 @@ bool
  Perl_is_uni_digit_lc(pTHX_ UV c)
  {
      if (c < 256) {
-        return isDIGIT_LC(UNI_TO_NATIVE(c));
+        return isDIGIT_LC(c);
      }
      return _is_uni_FOO(_CC_DIGIT, c);
  }
@@ -1941,7 +1937,7 @@ bool
  Perl_is_uni_upper_lc(pTHX_ UV c)
  {
      if (c < 256) {
-        return isUPPER_LC(UNI_TO_NATIVE(c));
+        return isUPPER_LC(c);
      }
      return _is_uni_FOO(_CC_UPPER, c);
  }
@@ -1950,7 +1946,7 @@ bool
  Perl_is_uni_lower_lc(pTHX_ UV c)
  {
      if (c < 256) {
-        return isLOWER_LC(UNI_TO_NATIVE(c));
+        return isLOWER_LC(c);
      }
      return _is_uni_FOO(_CC_LOWER, c);
  }
@@ -1959,7 +1955,7 @@ bool
  Perl_is_uni_cntrl_lc(pTHX_ UV c)
  {
      if (c < 256) {
-        return isCNTRL_LC(UNI_TO_NATIVE(c));
+        return isCNTRL_LC(c);
      }
      return 0;
  }
@@ -1968,7 +1964,7 @@ bool
  Perl_is_uni_graph_lc(pTHX_ UV c)
  {
      if (c < 256) {
-        return isGRAPH_LC(UNI_TO_NATIVE(c));
+        return isGRAPH_LC(c);
      }
      return _is_uni_FOO(_CC_GRAPH, c);
  }
@@ -1977,7 +1973,7 @@ bool
  Perl_is_uni_print_lc(pTHX_ UV c)
  {
      if (c < 256) {
-        return isPRINT_LC(UNI_TO_NATIVE(c));
+        return isPRINT_LC(c);
      }
      return _is_uni_FOO(_CC_PRINT, c);
  }
@@ -1986,7 +1982,7 @@ bool
  Perl_is_uni_punct_lc(pTHX_ UV c)
  {
      if (c < 256) {
-        return isPUNCT_LC(UNI_TO_NATIVE(c));
+        return isPUNCT_LC(c);
      }
      return _is_uni_FOO(_CC_PUNCT, c);
  }
@@ -1995,7 +1991,7 @@ bool
  Perl_is_uni_xdigit_lc(pTHX_ UV c)
  {
      if (c < 256) {
-       return isXDIGIT_LC(UNI_TO_NATIVE(c));
+       return isXDIGIT_LC(c);
      }
      return isXDIGIT_uni(c);
  }
@@ -2382,13 +2378,8 @@ Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp,
                         SV **swashp, const char *normal, const char *special)
  {
      dVAR;
-    U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
      STRLEN len = 0;
-    const UV uv0 = valid_utf8_to_uvchr(p, NULL);
-    /* The NATIVE_TO_UNI() and UNI_TO_NATIVE() mappings
-     * are necessary in EBCDIC, they are redundant no-ops
-     * in ASCII-ish platforms, and hopefully optimized away. */
-    const UV uv1 = NATIVE_TO_UNI(uv0);
+    const UV uv1 = valid_utf8_to_uvchr(p, NULL);
  
      PERL_ARGS_ASSERT_TO_UTF8_CASE;
  
@@ -2414,8 +2405,6 @@ Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp,
          * be given */
      }
  
-    uvuni_to_utf8(tmpbuf, uv1);
-
      if (!*swashp) /* load on-demand */
           *swashp = _core_swash_init("utf8", normal, &PL_sv_undef, 4, 0, NULL, NULL);
  
@@ -2426,56 +2415,26 @@ Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp,
          SV **svp;
  
          if (hv &&
-            (svp = hv_fetch(hv, (const char*)tmpbuf, UNISKIP(uv1), FALSE)) &&
+            (svp = hv_fetch(hv, (const char*)p, UNISKIP(uv1), FALSE)) &&
              (*svp)) {
              const char *s;
  
               s = SvPV_const(*svp, len);
               if (len == 1)
+                  /* EIGHTBIT */
                    len = uvuni_to_utf8(ustrp, NATIVE_TO_UNI(*(U8*)s)) - ustrp;
               else {
-#ifdef EBCDIC
-                  /* If we have EBCDIC we need to remap the characters
-                   * since any characters in the low 256 are Unicode
-                   * code points, not EBCDIC. */
-                  U8 *t = (U8*)s, *tend = t + len, *d;
-               
-                  d = tmpbuf;
-                  if (SvUTF8(*svp)) {
-                       STRLEN tlen = 0;
-                       
-                       while (t < tend) {
-                            const UV c = utf8_to_uvchr_buf(t, tend, &tlen);
-                            if (tlen > 0) {
-                                 d = uvchr_to_utf8(d, UNI_TO_NATIVE(c));
-                                 t += tlen;
-                            }
-                            else
-                                 break;
-                       }
-                  }
-                  else {
-                       while (t < tend) {
-                            d = uvchr_to_utf8(d, UNI_TO_NATIVE(*t));
-                            t++;
-                       }
-                  }
-                  len = d - tmpbuf;
-                  Copy(tmpbuf, ustrp, len, U8);
-#else
                    Copy(s, ustrp, len, U8);
-#endif
               }
          }
      }
  
      if (!len && *swashp) {
-       const UV uv2 = swash_fetch(*swashp, tmpbuf, TRUE /* => is utf8 */);
+       const UV uv2 = swash_fetch(*swashp, p, TRUE /* => is utf8 */);
  
          if (uv2) {
               /* It was "normal" (a single character mapping). */
-             const UV uv3 = UNI_TO_NATIVE(uv2);
-             len = uvchr_to_utf8(ustrp, uv3) - ustrp;
+             len = uvchr_to_utf8(ustrp, uv2) - ustrp;
          }
      }
  
@@ -2496,7 +2455,7 @@ Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp,
      if (lenp)
          *lenp = len;
  
-    return uv0;
+    return uv1;
  
  }
  
@@ -3195,7 +3154,7 @@ Perl_swash_fetch(pTHX_ SV *swash, const U8 *ptr, bool do_utf8)
      U32 bit;
      SV *swatch;
      U8 tmputf8[2];
-    const UV c = NATIVE_TO_ASCII(*ptr);
+    const UV c = *ptr;
  
      PERL_ARGS_ASSERT_SWASH_FETCH;
  
@@ -3209,7 +3168,7 @@ Perl_swash_fetch(pTHX_ SV *swash, const U8 *ptr, bool do_utf8)
      }
  
      /* Convert to utf8 if not already */
-    if (!do_utf8 && !UNI_IS_INVARIANT(c)) {
+    if (!do_utf8 && !NATIVE_IS_INVARIANT(c)) {
         tmputf8[0] = (U8)UTF8_EIGHT_BIT_HI(c);
         tmputf8[1] = (U8)UTF8_EIGHT_BIT_LO(c);
         ptr = tmputf8;
@@ -4605,7 +4564,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const c
                     to_utf8_fold(p1, foldbuf1, &n1);
                 }
                 else {  /* Not utf8, get utf8 fold */
-                   to_uni_fold(NATIVE_TO_LATIN1(*p1), foldbuf1, &n1);
+                   to_uni_fold(*p1, foldbuf1, &n1);
                 }
                 f1 = foldbuf1;
             }
@@ -4650,7 +4609,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const c
                     to_utf8_fold(p2, foldbuf2, &n2);
                 }
                 else {
-                   to_uni_fold(NATIVE_TO_LATIN1(*p2), foldbuf2, &n2);
+                   to_uni_fold(*p2, foldbuf2, &n2);
                 }
                 f2 = foldbuf2;
             }
author	Karl Williamson <public@khwilliamson.com>
	Sun, 17 Feb 2013 20:47:13 +0000 (13:47 -0700)
committer	Karl Williamson <public@khwilliamson.com>
	Thu, 29 Aug 2013 15:55:52 +0000 (09:55 -0600)
handy.h		patch \| blob \| history
perly.c		patch \| blob \| history
pp.c		patch \| blob \| history
regcomp.c		patch \| blob \| history
regexec.c		patch \| blob \| history
utf8.c		patch \| blob \| history