Use separate macros for byte vs uv Unicode

author Karl Williamson <public@khwilliamson.com>

Mon, 25 Mar 2013 19:09:09 +0000 (13:09 -0600)

committer Karl Williamson <public@khwilliamson.com>

Wed, 11 Sep 2013 03:02:58 +0000 (21:02 -0600)
author Karl Williamson <public@khwilliamson.com>
Mon, 25 Mar 2013 19:09:09 +0000 (13:09 -0600)
committer Karl Williamson <public@khwilliamson.com>
Wed, 11 Sep 2013 03:02:58 +0000 (21:02 -0600)
diff --git a/dist/IO/IO.pm b/dist/IO/IO.pm

index 35aba10..21583f5 100644 (file)
--- a/dist/IO/IO.pm
+++ b/dist/IO/IO.pm
@@ -7,7 +7,7 @@ use Carp;
  use strict;
  use warnings;
  
-our $VERSION = "1.28";
+our $VERSION = "1.29";
  XSLoader::load 'IO', $VERSION;
  
  sub import {
diff --git a/dist/IO/IO.xs b/dist/IO/IO.xs

index c603456..5ae41ae 100644 (file)
--- a/dist/IO/IO.xs
+++ b/dist/IO/IO.xs
@@ -337,7 +337,7 @@ ungetc(handle, c)
                  croak("Negative character number in ungetc()");
  
              v = SvUV(c);
-            if (NATIVE_IS_INVARIANT(v) || (v <= 0xFF && !PerlIO_isutf8(handle)))
+            if (UVCHR_IS_INVARIANT(v) || (v <= 0xFF && !PerlIO_isutf8(handle)))
                  RETVAL = PerlIO_ungetc(handle, (int)v);
              else {
                  U8 buf[UTF8_MAXBYTES + 1], *end;
diff --git a/doop.c b/doop.c

index ac11c73..5031af8 100644 (file)
--- a/doop.c
+++ b/doop.c
@@ -331,7 +331,7 @@ S_do_trans_simple_utf8(pTHX_ SV * const sv)
         const U8 * const e = s + len;
         while (t < e) {
             const U8 ch = *t++;
-           hibit = !NATIVE_IS_INVARIANT(ch);
+           hibit = !NATIVE_BYTE_IS_INVARIANT(ch);
             if (hibit) {
                 s = bytes_to_utf8(s, &len);
                 break;
@@ -432,7 +432,7 @@ S_do_trans_count_utf8(pTHX_ SV * const sv)
         const U8 * const e = s + len;
         while (t < e) {
             const U8 ch = *t++;
-           hibit = !NATIVE_IS_INVARIANT(ch);
+           hibit = !NATIVE_BYTE_IS_INVARIANT(ch);
             if (hibit) {
                 start = s = bytes_to_utf8(s, &len);
                 break;
@@ -487,7 +487,7 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv)
         const U8 * const e = s + len;
         while (t < e) {
             const U8 ch = *t++;
-           hibit = !NATIVE_IS_INVARIANT(ch);
+           hibit = !NATIVE_BYTE_IS_INVARIANT(ch);
             if (hibit) {
                 s = bytes_to_utf8(s, &len);
                 break;
diff --git a/inline.h b/inline.h

index a2727f4..226970b 100644 (file)
--- a/inline.h
+++ b/inline.h
@@ -258,7 +258,7 @@ S_append_utf8_from_native_byte(const U8 byte, U8** dest)
  
      PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
  
-    if (NATIVE_IS_INVARIANT(byte))
+    if (NATIVE_BYTE_IS_INVARIANT(byte))
          *(*dest)++ = byte;
      else {
          *(*dest)++ = UTF8_EIGHT_BIT_HI(byte);
diff --git a/pp.c b/pp.c

index 6fc6c9f..860db37 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -4244,7 +4244,7 @@ PP(pp_fc)
                      for (; s < send; s++) {
                          STRLEN ulen;
                          UV fc = _to_uni_fold_flags(*s, tmpbuf, &ulen, flags);
-                        if NATIVE_IS_INVARIANT(fc) {
+                        if UVCHR_IS_INVARIANT(fc) {
                              if (full_folding
                                  && *s == LATIN_SMALL_LETTER_SHARP_S)
                              {
diff --git a/pp_pack.c b/pp_pack.c

index 588e448..3c4e373 100644 (file)
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -2003,7 +2003,7 @@ marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
      from_start = SvPVX_const(sv);
      from_end = from_start + SvCUR(sv);
      for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
-       if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
+       if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
      if (from_ptr == from_end) {
         /* Simple case: no character needs to be changed */
         SvUTF8_on(sv);
diff --git a/regcomp.c b/regcomp.c

index 3e6ec29..10b1aa3 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -1792,7 +1792,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                     if ( !UTF ) {
                         /* store first byte of utf8 representation of
                            variant codepoints */
-                       if (! NATIVE_IS_INVARIANT(uvc)) {
+                       if (! UVCHR_IS_INVARIANT(uvc)) {
                             TRIE_BITMAP_SET(trie, UTF8_TWO_BYTE_HI(uvc));
                         }
                     }
@@ -4976,7 +4976,7 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
      Newx(dst, *plen_p * 2 + 1, U8);
  
      while (s < *plen_p) {
-        if (NATIVE_IS_INVARIANT(src[s]))
+        if (NATIVE_BYTE_IS_INVARIANT(src[s]))
              dst[d]   = src[s];
          else {
              dst[d++] = UTF8_EIGHT_BIT_HI(src[s]);
@@ -11186,7 +11186,7 @@ tryagain:
                       * utf8.  If we start to fold non-UTF patterns, be sure to
                       * update join_exact() */
                      if (LOC && ender < 256) {
-                        if (NATIVE_IS_INVARIANT(ender)) {
+                        if (UVCHR_IS_INVARIANT(ender)) {
                              *s = (U8) ender;
                              foldlen = 1;
                          } else {
diff --git a/sv.c b/sv.c

index e7be001..a3c4752 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -3316,7 +3316,7 @@ Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extr
  
         while (t < e) {
             const U8 ch = *t++;
-           if (NATIVE_IS_INVARIANT(ch)) continue;
+           if (NATIVE_BYTE_IS_INVARIANT(ch)) continue;
  
             t--;    /* t already incremented; re-point to first variant */
             two_byte_count = 1;
@@ -3451,7 +3451,7 @@ must_be_utf8:
  
                 while (d < e) {
                     const U8 chr = *d++;
-                   if (! NATIVE_IS_INVARIANT(chr)) two_byte_count++;
+                   if (! NATIVE_BYTE_IS_INVARIANT(chr)) two_byte_count++;
                 }
  
                 /* The string will expand by just the number of bytes that
@@ -3471,7 +3471,7 @@ must_be_utf8:
  
                 e--;
                 while (e >= t) {
-                   if (NATIVE_IS_INVARIANT(*e)) {
+                   if (NATIVE_BYTE_IS_INVARIANT(*e)) {
                         *d-- = *e;
                     } else {
                         *d-- = UTF8_EIGHT_BIT_LO(*e);
@@ -10879,7 +10879,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 goto unknown;
             uv = (args) ? va_arg(*args, int) : SvIV(argsv);
             if ((uv > 255 ||
-                (!NATIVE_IS_INVARIANT(uv) && SvUTF8(sv)))
+                (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
                 && !IN_BYTES) {
                 eptr = (char*)utf8buf;
                 elen = uvchr_to_utf8((U8*)eptr, uv) - utf8buf;
diff --git a/toke.c b/toke.c

index 968d30e..ef7d86b 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -3463,7 +3463,7 @@ S_scan_const(pTHX_ char *start)
                  * to recode the rest of the string into utf8 */
                 
                 /* Here uv is the ordinal of the next character being added */
-               if (!NATIVE_IS_INVARIANT(uv)) {
+               if (!UVCHR_IS_INVARIANT(uv)) {
                     if (!has_utf8 && uv > 255) {
                         /* Might need to recode whatever we have accumulated so
                          * far if it contains any chars variant in utf8 or
@@ -3797,7 +3797,7 @@ S_scan_const(pTHX_ char *start)
      default_action:
         /* If we started with encoded form, or already know we want it,
            then encode the next character */
-       if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
+       if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
             STRLEN len  = 1;
  
  
@@ -11886,7 +11886,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
             /* Append native character for the rev point */
             tmpend = uvchr_to_utf8(tmpbuf, rev);
             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
-           if (!NATIVE_IS_INVARIANT(rev))
+           if (!UVCHR_IS_INVARIANT(rev))
                  SvUTF8_on(sv);
             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
                  s = ++pos;
diff --git a/utf8.c b/utf8.c

index 51b9822..4745a63 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1693,7 +1693,7 @@ Perl__to_upper_title_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp, const char S_
  
      assert(S_or_s == 'S' || S_or_s == 's');
  
-    if (NATIVE_IS_INVARIANT(converted)) { /* No difference between the two for
+    if (UVCHR_IS_INVARIANT(converted)) { /* No difference between the two for
                                              characters in this range */
         *p = (U8) converted;
         *lenp = 1;
@@ -1794,7 +1794,7 @@ S_to_lower_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp)
      U8 converted = toLOWER_LATIN1(c);
  
      if (p != NULL) {
-       if (NATIVE_IS_INVARIANT(converted)) {
+       if (NATIVE_BYTE_IS_INVARIANT(converted)) {
             *p = converted;
             *lenp = 1;
         }
@@ -1864,7 +1864,7 @@ Perl__to_fold_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp, const unsigned int f
         converted = toLOWER_LATIN1(c);
      }
  
-    if (NATIVE_IS_INVARIANT(converted)) {
+    if (UVCHR_IS_INVARIANT(converted)) {
         *p = (U8) converted;
         *lenp = 1;
      }
diff --git a/utf8.h b/utf8.h

index 7036488..5880aa3 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -338,7 +338,12 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
   * UTF-8 encoded string) */
  #define UTF8_IS_INVARIANT(c)           UNI_IS_INVARIANT(NATIVE_UTF8_TO_I8(c))
  
-#define NATIVE_IS_INVARIANT(c)         UNI_IS_INVARIANT(NATIVE_TO_LATIN1(c))
+/* Like the above, but its name implies a non-UTF8 input, and is implemented
+ * differently (for no particular reason) */
+#define NATIVE_BYTE_IS_INVARIANT(c)    UNI_IS_INVARIANT(NATIVE_TO_LATIN1(c))
+
+/* Like the above, but accepts any UV as input */
+#define UVCHR_IS_INVARIANT(uv)          UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))
  
  #define MAX_PORTABLE_UTF8_TWO_BYTE 0x3FF    /* constrained by EBCDIC */
author	Karl Williamson <public@khwilliamson.com>
	Mon, 25 Mar 2013 19:09:09 +0000 (13:09 -0600)
committer	Karl Williamson <public@khwilliamson.com>
	Wed, 11 Sep 2013 03:02:58 +0000 (21:02 -0600)
dist/IO/IO.pm		patch \| blob \| history
dist/IO/IO.xs		patch \| blob \| history
doop.c		patch \| blob \| history
inline.h		patch \| blob \| history
pp.c		patch \| blob \| history
pp_pack.c		patch \| blob \| history
regcomp.c		patch \| blob \| history
sv.c		patch \| blob \| history
toke.c		patch \| blob \| history
utf8.c		patch \| blob \| history
utf8.h		patch \| blob \| history