Add and use macro to return EBCDIC

author Karl Williamson <public@khwilliamson.com>

Sun, 17 Feb 2013 19:46:05 +0000 (12:46 -0700)

committer Karl Williamson <public@khwilliamson.com>

Thu, 29 Aug 2013 15:55:52 +0000 (09:55 -0600)
author Karl Williamson <public@khwilliamson.com>
Sun, 17 Feb 2013 19:46:05 +0000 (12:46 -0700)
committer Karl Williamson <public@khwilliamson.com>
Thu, 29 Aug 2013 15:55:52 +0000 (09:55 -0600)
diff --git a/handy.h b/handy.h

index 144d2a1..73fdaca 100644 (file)
--- a/handy.h
+++ b/handy.h
@@ -1361,9 +1361,9 @@ EXTCONST U32 PL_charclass[];
                                           ? _generic_isCC(*(p), classnum)       \
                                           : (UTF8_IS_DOWNGRADEABLE_START(*(p))) \
                                             ? _generic_isCC(                    \
-                                                   TWO_BYTE_UTF8_TO_UNI(*(p),  \
+                                                TWO_BYTE_UTF8_TO_NATIVE(*(p),  \
                                                                     *((p)+1 )), \
-                                                   classnum)                   \
+                                                classnum)                      \
                                             : utf8)
  /* Like the above, but calls 'above_latin1(p)' to get the utf8 value.  'above_latin1'
   * can be a macro */
@@ -1438,11 +1438,11 @@ EXTCONST U32 PL_charclass[];
   * use the value given by the 'utf8' parameter.  This relies on the fact that
   * ASCII characters have the same representation whether utf8 or not.  Note
   * that it assumes that the utf8 has been validated, and ignores 'use bytes' */
-#define _generic_LC_utf8(macro, p, utf8)                                   \
-                         (UTF8_IS_INVARIANT(*(p))                          \
-                         ? macro(*(p))                                     \
-                         : (UTF8_IS_DOWNGRADEABLE_START(*(p)))             \
-                           ? macro(TWO_BYTE_UTF8_TO_UNI(*(p), *((p)+1)))   \
+#define _generic_LC_utf8(macro, p, utf8)                                    \
+                         (UTF8_IS_INVARIANT(*(p))                           \
+                         ? macro(*(p))                                      \
+                         : (UTF8_IS_DOWNGRADEABLE_START(*(p)))              \
+                           ? macro(TWO_BYTE_UTF8_TO_NATIVE(*(p), *((p)+1))) \
                             : utf8)
  
  #define _generic_LC_swash_utf8(macro, classnum, p)                         \
diff --git a/pp.c b/pp.c

index 032b939..cd50626 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -4091,7 +4091,7 @@ PP(pp_quotemeta)
                     /* In locale, we quote all non-ASCII Latin1 chars.
                      * Otherwise use the quoting rules */
                     if (IN_LOCALE_RUNTIME
-                       || _isQUOTEMETA(TWO_BYTE_UTF8_TO_UNI(*s, *(s + 1))))
+                       || _isQUOTEMETA(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s + 1))))
                     {
                         to_quote = TRUE;
                     }
diff --git a/regcomp.c b/regcomp.c

index b830385..34aefa1 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -11284,8 +11284,8 @@ tryagain:
                              /* No Latin1 characters participate in multi-char
                               * folds under /l */
                              if (LOC
-                                || ! IS_NON_FINAL_FOLD(TWO_BYTE_UTF8_TO_UNI(
-                                                                *s, *(s+1))))
+                                || ! IS_NON_FINAL_FOLD(TWO_BYTE_UTF8_TO_NATIVE(
+                                                                  *s, *(s+1))))
                              {
                                  break;
                              }
diff --git a/regexec.c b/regexec.c

index 6a77019..384e4e7 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -484,7 +484,7 @@ S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character)
      }
      else if (UTF8_IS_DOWNGRADEABLE_START(*character)) {
          return isFOO_lc(classnum,
-                        TWO_BYTE_UTF8_TO_UNI(*character, *(character + 1)));
+                        TWO_BYTE_UTF8_TO_NATIVE(*character, *(character + 1)));
      }
  
      if (classnum < _FIRST_NON_SWASH_CC) {
@@ -1746,7 +1746,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                                                                  classnum)))
                          || (UTF8_IS_DOWNGRADEABLE_START(*s)
                              && to_complement ^ cBOOL(
-                                _generic_isCC(TWO_BYTE_UTF8_TO_UNI(*s, *(s + 1)),
+                                _generic_isCC(TWO_BYTE_UTF8_TO_NATIVE(*s,
+                                                                      *(s + 1)),
                                                classnum))))
                      {
                          if (tmp && (reginfo->intuit || regtry(reginfo, &s)))
@@ -4153,7 +4154,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                              l++;
                          }
                          else {
-                            if (TWO_BYTE_UTF8_TO_UNI(*l, *(l+1)) != * (U8*) s) {
+                            if (TWO_BYTE_UTF8_TO_NATIVE(*l, *(l+1)) != * (U8*) s)
+                            {
                                  sayNO;
                              }
                              l += 2;
@@ -4176,7 +4178,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                              s++;
                          }
                          else {
-                            if (TWO_BYTE_UTF8_TO_UNI(*s, *(s+1)) != * (U8*) l) {
+                            if (TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)) != * (U8*) l)
+                            {
                                  sayNO;
                              }
                              s += 2;
@@ -4391,7 +4394,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              }
              else if (UTF8_IS_DOWNGRADEABLE_START(nextchr)) {
                  if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan),
-                                        (U8) TWO_BYTE_UTF8_TO_UNI(nextchr,
+                                           (U8) TWO_BYTE_UTF8_TO_NATIVE(nextchr,
                                                              *(locinput + 1))))))
                  {
                      sayNO;
@@ -4472,9 +4475,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              }
              else if (UTF8_IS_DOWNGRADEABLE_START(nextchr)) {
                  if (! (to_complement
-                       ^ cBOOL(_generic_isCC(TWO_BYTE_UTF8_TO_UNI(nextchr,
+                       ^ cBOOL(_generic_isCC(TWO_BYTE_UTF8_TO_NATIVE(nextchr,
                                                                 *(locinput + 1)),
-                                              FLAGS(scan)))))
+                                             FLAGS(scan)))))
                  {
                      sayNO;
                  }
@@ -6860,7 +6863,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
                  /* Target isn't utf8; convert the character in the UTF-8
                   * pattern to non-UTF8, and do a simple loop */
-                c = TWO_BYTE_UTF8_TO_UNI(c, *(STRING(p) + 1));
+                c = TWO_BYTE_UTF8_TO_NATIVE(c, *(STRING(p) + 1));
                  while (scan < loceol && UCHARAT(scan) == c) {
                      scan++;
                  }
@@ -7087,8 +7090,8 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      }
                      else if (UTF8_IS_DOWNGRADEABLE_START(*scan)) {
                          if (! (to_complement
-                              ^ cBOOL(_generic_isCC(TWO_BYTE_UTF8_TO_UNI(*scan,
-                                                                   *(scan + 1)),
+                              ^ cBOOL(_generic_isCC(TWO_BYTE_UTF8_TO_NATIVE(*scan,
+                                                                     *(scan + 1)),
                                                      classnum))))
                          {
                              break;
diff --git a/toke.c b/toke.c

index a7f7d88..ab62548 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1073,7 +1073,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                 }
                 else {
                      assert(p < e -1 );
-                   *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
+                   *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
                     p += 2;
                  }
             }
@@ -2836,7 +2836,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
              }
              s++;
          } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-            if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
+            if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
                  goto bad_charname;
              }
              s += 2;
@@ -2869,8 +2869,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                  s++;
              }
              else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-                if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
-                                                                    *(s+1)))))
+                if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
                  {
                      goto bad_charname;
                  }
diff --git a/utf8.c b/utf8.c

index b445a2e..2d827a1 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1204,7 +1204,7 @@ Perl_bytes_cmp_utf8(pTHX_ const U8 *b, STRLEN blen, const U8 *u, STRLEN ulen)
                 if (u < uend) {
                     U8 c1 = *u++;
                     if (UTF8_IS_CONTINUATION(c1)) {
-                       c = UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(c, c1));
+                       c = TWO_BYTE_UTF8_TO_NATIVE(c, c1);
                     } else {
                         Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
                                          "Malformed UTF-8 character "
@@ -1333,7 +1333,7 @@ Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *len, bool *is_utf8)
         U8 c = *s++;
         if (!UTF8_IS_INVARIANT(c)) {
             /* Then it is two-byte encoded */
-           c = UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(c, *s++));
+           c = TWO_BYTE_UTF8_TO_NATIVE(c, *s++);
         }
         *d++ = c;
      }
@@ -2578,10 +2578,10 @@ Perl__to_utf8_upper_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, const bool
      }
      else if UTF8_IS_DOWNGRADEABLE_START(*p) {
         if (flags) {
-           result = toUPPER_LC(TWO_BYTE_UTF8_TO_UNI(*p, *(p+1)));
+           result = toUPPER_LC(TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)));
         }
         else {
-           return _to_upper_title_latin1(TWO_BYTE_UTF8_TO_UNI(*p, *(p+1)),
+           return _to_upper_title_latin1(TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)),
                                           ustrp, lenp, 'S');
         }
      }
@@ -2644,10 +2644,10 @@ Perl__to_utf8_title_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, const bool
      }
      else if UTF8_IS_DOWNGRADEABLE_START(*p) {
         if (flags) {
-           result = toUPPER_LC(TWO_BYTE_UTF8_TO_UNI(*p, *(p+1)));
+           result = toUPPER_LC(TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)));
         }
         else {
-           return _to_upper_title_latin1(TWO_BYTE_UTF8_TO_UNI(*p, *(p+1)),
+           return _to_upper_title_latin1(TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)),
                                           ustrp, lenp, 's');
         }
      }
@@ -2708,10 +2708,10 @@ Perl__to_utf8_lower_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, const bool
      }
      else if UTF8_IS_DOWNGRADEABLE_START(*p) {
         if (flags) {
-           result = toLOWER_LC(TWO_BYTE_UTF8_TO_UNI(*p, *(p+1)));
+           result = toLOWER_LC(TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)));
         }
         else {
-           return to_lower_latin1(TWO_BYTE_UTF8_TO_UNI(*p, *(p+1)),
+           return to_lower_latin1(TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)),
                                    ustrp, lenp);
         }
      }
@@ -2786,10 +2786,10 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags, b
      }
      else if UTF8_IS_DOWNGRADEABLE_START(*p) {
         if (flags & FOLD_FLAGS_LOCALE) {
-           result = toFOLD_LC(TWO_BYTE_UTF8_TO_UNI(*p, *(p+1)));
+           result = toFOLD_LC(TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)));
         }
         else {
-           return _to_fold_latin1(TWO_BYTE_UTF8_TO_UNI(*p, *(p+1)),
+           return _to_fold_latin1(TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)),
                              ustrp, lenp,
                              flags & (FOLD_FLAGS_FULL | FOLD_FLAGS_NOMIX_ASCII));
         }
@@ -4586,7 +4586,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const c
                         *foldbuf1 = *p1;
                     }
                     else {
-                       *foldbuf1 = TWO_BYTE_UTF8_TO_UNI(*p1, *(p1 + 1));
+                       *foldbuf1 = TWO_BYTE_UTF8_TO_NATIVE(*p1, *(p1 + 1));
                     }
                     n1 = 1;
                 }
@@ -4629,7 +4629,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const c
                         *foldbuf2 = *p2;
                     }
                     else {
-                       *foldbuf2 = TWO_BYTE_UTF8_TO_UNI(*p2, *(p2 + 1));
+                       *foldbuf2 = TWO_BYTE_UTF8_TO_NATIVE(*p2, *(p2 + 1));
                     }
  
                     /* Use another function to handle locale rules.  We've made
diff --git a/utf8.h b/utf8.h

index 4738648..bbbefde 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -302,14 +302,17 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
                                                 && ( (e) - (s) > 1)             \
                                                 && UTF8_IS_CONTINUATION(*((s)+1)))
  
-/* Convert a two (not one) byte utf8 character to a unicode code point value.
+/* Convert a two (not one) byte utf8 character to a native code point value.
   * Needs just one iteration of accumulate.  Should not be used unless it is
   * known that the two bytes are legal: 1) two-byte start, and 2) continuation.
   * Note that the result can be larger than 255 if the input character is not
   * downgradable */
-#define TWO_BYTE_UTF8_TO_UNI(HI, LO) \
-                   UTF8_ACCUMULATE((NATIVE_TO_UTF(HI) & UTF_START_MASK(2)), \
-                                    NATIVE_TO_UTF(LO))
+#define TWO_BYTE_UTF8_TO_NATIVE(HI, LO) \
+     UNI_TO_NATIVE(UTF8_ACCUMULATE((NATIVE_UTF8_TO_I8(HI) & UTF_START_MASK(2)), \
+                                   NATIVE_UTF8_TO_I8(LO)))
+
+/* Should never be used, and be deprecated */
+#define TWO_BYTE_UTF8_TO_UNI(HI, LO) NATIVE_TO_UNI(TWO_BYTE_UTF8_TO_NATIVE(HI, LO))
  
  /* How many bytes in the UTF-8 encoded character whose first (perhaps only)
   * byte is pointed to by 's' */
author	Karl Williamson <public@khwilliamson.com>
	Sun, 17 Feb 2013 19:46:05 +0000 (12:46 -0700)
committer	Karl Williamson <public@khwilliamson.com>
	Thu, 29 Aug 2013 15:55:52 +0000 (09:55 -0600)
handy.h		patch \| blob \| history
pp.c		patch \| blob \| history
regcomp.c		patch \| blob \| history
regexec.c		patch \| blob \| history
toke.c		patch \| blob \| history
utf8.c		patch \| blob \| history
utf8.h		patch \| blob \| history