Convert some uvuni() to uvchr()

author Karl Williamson <public@khwilliamson.com>

Sun, 24 Feb 2013 23:43:59 +0000 (16:43 -0700)

committer Karl Williamson <public@khwilliamson.com>

Thu, 29 Aug 2013 15:55:56 +0000 (09:55 -0600)
author Karl Williamson <public@khwilliamson.com>
Sun, 24 Feb 2013 23:43:59 +0000 (16:43 -0700)
committer Karl Williamson <public@khwilliamson.com>
Thu, 29 Aug 2013 15:55:56 +0000 (09:55 -0600)
diff --git a/cygwin/cygwin.c b/cygwin/cygwin.c

index 87401d1..e7be5e3 100644 (file)
--- a/cygwin/cygwin.c
+++ b/cygwin/cygwin.c
@@ -156,7 +156,7 @@ wide_to_utf8(const wchar_t *wbuf)
      char *oldlocale = setlocale(LC_CTYPE, NULL);
      setlocale(LC_CTYPE, "utf-8");
  
-    /* uvuni_to_utf8(buf, chr) or Encoding::_bytes_to_utf8(sv, "UCS-2BE"); */
+    /* uvchr_to_utf8(buf, chr) or Encoding::_bytes_to_utf8(sv, "UCS-2BE"); */
      wlen = wcsrtombs(NULL, (const wchar_t **)&wbuf, wlen, NULL);
      buf = (char *) safemalloc(wlen+1);
      wcsrtombs(buf, (const wchar_t **)&wbuf, wlen, NULL);
@@ -176,7 +176,7 @@ utf8_to_wide(const char *buf)
  
      setlocale(LC_CTYPE, "utf-8");
      wbuf = (wchar_t *) safemalloc(wlen);
-    /* utf8_to_uvuni_buf(pathname, pathname + wlen, wpath) or Encoding::_utf8_to_bytes(sv, "UCS-2BE"); */
+    /* utf8_to_uvchr_buf(pathname, pathname + wlen, wpath) or Encoding::_utf8_to_bytes(sv, "UCS-2BE"); */
      wlen = mbsrtowcs(wbuf, (const char**)&buf, wlen, &mbs);
  
      if (oldlocale) setlocale(LC_CTYPE, oldlocale);
@@ -283,7 +283,7 @@ XS(XS_Cygwin_win_to_posix_path)
             mbstate_t mbs;
              char *oldlocale = setlocale(LC_CTYPE, NULL);
              setlocale(LC_CTYPE, "utf-8");
-           /* utf8_to_uvuni_buf(src_path, src_path + wlen, wpath) or Encoding::_utf8_to_bytes(sv, "UCS-2BE"); */
+           /* utf8_to_uvchr_buf(src_path, src_path + wlen, wpath) or Encoding::_utf8_to_bytes(sv, "UCS-2BE"); */
             wlen = mbsrtowcs(wpath, (const char**)&src_path, wlen, &mbs);
             if (wlen > 0)
                 err = cygwin_conv_path(what, wpath, wbuf, wlen);
@@ -370,7 +370,7 @@ XS(XS_Cygwin_posix_to_win_path)
         setlocale(LC_CTYPE, "utf-8");
         if (!IN_BYTES) {
             mbstate_t mbs;
-           /* utf8_to_uvuni_buf(src_path, src_path + wlen, wpath) or Encoding::_utf8_to_bytes(sv, "UCS-2BE"); */
+           /* utf8_to_uvchr_buf(src_path, src_path + wlen, wpath) or Encoding::_utf8_to_bytes(sv, "UCS-2BE"); */
             wlen = mbsrtowcs(wpath, (const char**)&src_path, wlen, &mbs);
             if (wlen > 0)
                 err = cygwin_conv_path(what, wpath, wbuf, wlen);
diff --git a/doop.c b/doop.c

index 5a819a7..ac11c73 100644 (file)
--- a/doop.c
+++ b/doop.c
@@ -361,7 +361,7 @@ S_do_trans_simple_utf8(pTHX_ SV * const sv)
         if (uv < none) {
             s += UTF8SKIP(s);
             matches++;
-           d = uvuni_to_utf8(d, uv);
+           d = uvchr_to_utf8(d, uv);
         }
         else if (uv == none) {
             const int i = UTF8SKIP(s);
@@ -372,7 +372,7 @@ S_do_trans_simple_utf8(pTHX_ SV * const sv)
         else if (uv == extra) {
             s += UTF8SKIP(s);
             matches++;
-           d = uvuni_to_utf8(d, final);
+           d = uvchr_to_utf8(d, final);
         }
         else
             s += UTF8SKIP(s);
@@ -532,7 +532,7 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv)
                 matches++;
                 s += UTF8SKIP(s);
                 if (uv != puv) {
-                   d = uvuni_to_utf8(d, uv);
+                   d = uvchr_to_utf8(d, uv);
                     puv = uv;
                 }
                 continue;
@@ -550,13 +550,13 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv)
                 if (havefinal) {
                     s += UTF8SKIP(s);
                     if (puv != final) {
-                       d = uvuni_to_utf8(d, final);
+                       d = uvchr_to_utf8(d, final);
                         puv = final;
                     }
                 }
                 else {
                     STRLEN len;
-                   uv = utf8n_to_uvuni(s, send - s, &len, UTF8_ALLOW_DEFAULT);
+                   uv = utf8n_to_uvchr(s, send - s, &len, UTF8_ALLOW_DEFAULT);
                     if (uv != puv) {
                         Move(s, d, len, U8);
                         d += len;
@@ -585,7 +585,7 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv)
             if (uv < none) {
                 matches++;
                 s += UTF8SKIP(s);
-               d = uvuni_to_utf8(d, uv);
+               d = uvchr_to_utf8(d, uv);
                 continue;
             }
             else if (uv == none) {      /* "none" is unmapped character */
@@ -598,7 +598,7 @@ S_do_trans_complex_utf8(pTHX_ SV * const sv)
             else if (uv == extra && !del) {
                 matches++;
                 s += UTF8SKIP(s);
-               d = uvuni_to_utf8(d, final);
+               d = uvchr_to_utf8(d, final);
                 continue;
             }
             matches++;                  /* "none+1" is delete character */
diff --git a/op.c b/op.c

index 7d8ac92..23eeaaa 100644 (file)
--- a/op.c
+++ b/op.c
@@ -4129,11 +4129,11 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
             i = 0;
             transv = newSVpvs("");
             while (t < tend) {
-               cp[2*i] = utf8n_to_uvuni(t, tend-t, &ulen, flags);
+               cp[2*i] = utf8n_to_uvchr(t, tend-t, &ulen, flags);
                 t += ulen;
                 if (t < tend && *t == ILLEGAL_UTF8_BYTE) {
                     t++;
-                   cp[2*i+1] = utf8n_to_uvuni(t, tend-t, &ulen, flags);
+                   cp[2*i+1] = utf8n_to_uvchr(t, tend-t, &ulen, flags);
                     t += ulen;
                 }
                 else {
@@ -4146,11 +4146,11 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
                 UV  val = cp[2*j];
                 diff = val - nextmin;
                 if (diff > 0) {
-                   t = uvuni_to_utf8(tmpbuf,nextmin);
+                   t = uvchr_to_utf8(tmpbuf,nextmin);
                     sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
                     if (diff > 1) {
                         U8  range_mark = ILLEGAL_UTF8_BYTE;
-                       t = uvuni_to_utf8(tmpbuf, val - 1);
+                       t = uvchr_to_utf8(tmpbuf, val - 1);
                         sv_catpvn(transv, (char *)&range_mark, 1);
                         sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
                     }
@@ -4159,13 +4159,13 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
                 if (val >= nextmin)
                     nextmin = val + 1;
             }
-           t = uvuni_to_utf8(tmpbuf,nextmin);
+           t = uvchr_to_utf8(tmpbuf,nextmin);
             sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
             {
                 U8 range_mark = ILLEGAL_UTF8_BYTE;
                 sv_catpvn(transv, (char *)&range_mark, 1);
             }
-           t = uvuni_to_utf8(tmpbuf, 0x7fffffff);
+           t = uvchr_to_utf8(tmpbuf, 0x7fffffff);
             sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
             t = (const U8*)SvPVX_const(transv);
             tlen = SvCUR(transv);
@@ -4186,11 +4186,11 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
         while (t < tend || tfirst <= tlast) {
             /* see if we need more "t" chars */
             if (tfirst > tlast) {
-               tfirst = (I32)utf8n_to_uvuni(t, tend - t, &ulen, flags);
+               tfirst = (I32)utf8n_to_uvchr(t, tend - t, &ulen, flags);
                 t += ulen;
                 if (t < tend && *t == ILLEGAL_UTF8_BYTE) {      /* illegal utf8 val indicates range */
                     t++;
-                   tlast = (I32)utf8n_to_uvuni(t, tend - t, &ulen, flags);
+                   tlast = (I32)utf8n_to_uvchr(t, tend - t, &ulen, flags);
                     t += ulen;
                 }
                 else
@@ -4200,11 +4200,11 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
             /* now see if we need more "r" chars */
             if (rfirst > rlast) {
                 if (r < rend) {
-                   rfirst = (I32)utf8n_to_uvuni(r, rend - r, &ulen, flags);
+                   rfirst = (I32)utf8n_to_uvchr(r, rend - r, &ulen, flags);
                     r += ulen;
                     if (r < rend && *r == ILLEGAL_UTF8_BYTE) {  /* illegal utf8 val indicates range */
                         r++;
-                       rlast = (I32)utf8n_to_uvuni(r, rend - r, &ulen, flags);
+                       rlast = (I32)utf8n_to_uvchr(r, rend - r, &ulen, flags);
                         r += ulen;
                     }
                     else
diff --git a/pp_pack.c b/pp_pack.c

index 39f862e..588e448 100644 (file)
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -319,7 +319,7 @@ uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len
             const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
             for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
                 if (ptr >= end) break;
-               utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
+               utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
             }
             if (from > end) from = end;
         }
@@ -1316,10 +1316,10 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                     len = UTF8SKIP(result);
                     if (!uni_to_bytes(aTHX_ &ptr, strend,
                                       (char *) &result[1], len-1, 'U')) break;
-                   auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
+                   auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT);
                     s = ptr;
                 } else {
-                   auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
+                   auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
                     if (retlen == (STRLEN) -1 || retlen == 0)
                         Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
                     s += retlen;
@@ -2585,8 +2585,8 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                         GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                         end = start+SvLEN(cat)-UTF8_MAXLEN;
                     }
-                   cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
-                                                      NATIVE_TO_UNI(auv),
+                   cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
+                                                      auv,
                                                        warn_utf8 ?
                                                        0 : UNICODE_ALLOW_ANY);
                 } else {
@@ -2639,7 +2639,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                 auv = SvUV(fromstr);
                 if (utf8) {
                     U8 buffer[UTF8_MAXLEN], *endb;
-                   endb = uvuni_to_utf8_flags(buffer, auv,
+                   endb = uvchr_to_utf8_flags(buffer, auv,
                                                warn_utf8 ?
                                                0 : UNICODE_ALLOW_ANY);
                     if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
@@ -2657,7 +2657,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                         GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                         end = start+SvLEN(cat)-UTF8_MAXLEN;
                     }
-                   cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
+                   cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv,
                                                        warn_utf8 ?
                                                        0 : UNICODE_ALLOW_ANY);
                 }
diff --git a/regcomp.c b/regcomp.c

index bb89a54..3bb13a9 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -1443,7 +1443,7 @@ and would end up looking like:
     8: EXACT <baz>(10)
    10: END(0)
  
-    d = uvuni_to_utf8_flags(d, uv, 0);
+    d = uvchr_to_utf8_flags(d, uv, 0);
  
  is the recommended Unicode-aware way of saying
  
@@ -1455,7 +1455,7 @@ is the recommended Unicode-aware way of saying
         if (UTF) {                                                         \
              SV *zlopp = newSV(7); /* XXX: optimize me */                   \
             unsigned char *flrbbbbb = (unsigned char *) SvPVX(zlopp);      \
-            unsigned const char *const kapow = uvuni_to_utf8(flrbbbbb, val); \
+            unsigned const char *const kapow = uvchr_to_utf8(flrbbbbb, val); \
             SvCUR_set(zlopp, kapow - flrbbbbb);                            \
             SvPOK_on(zlopp);                                               \
             SvUTF8_on(zlopp);                                              \
@@ -1470,12 +1470,12 @@ is the recommended Unicode-aware way of saying
      wordlen++;                                                                          \
      if ( UTF ) {                                                                        \
          /* if it is UTF then it is either already folded, or does not need folding */   \
-        uvc = utf8n_to_uvuni( (const U8*) uc, UTF8_MAXLEN, &len, uniflags);             \
+        uvc = utf8n_to_uvchr( (const U8*) uc, UTF8_MAXLEN, &len, uniflags);             \
      }                                                                                   \
      else if (folder == PL_fold_latin1) {                                                \
          /* if we use this folder we have to obey unicode rules on latin-1 data */       \
          if ( foldlen > 0 ) {                                                            \
-           uvc = utf8n_to_uvuni( (const U8*) scan, UTF8_MAXLEN, &len, uniflags );       \
+           uvc = utf8n_to_uvchr( (const U8*) scan, UTF8_MAXLEN, &len, uniflags );       \
             foldlen -= len;                                                              \
             scan += len;                                                                 \
             len = 0;                                                                     \
diff --git a/regexec.c b/regexec.c

index db6b730..ce22a00 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -1238,7 +1238,7 @@ STMT_START {                               \
      switch (trie_type) {                                                            \
      case trie_utf8_fold:                                                            \
          if ( foldlen>0 ) {                                                          \
-            uvc = utf8n_to_uvuni( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
+            uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
              foldlen -= len;                                                         \
              uscan += len;                                                           \
              len=0;                                                                  \
@@ -1252,7 +1252,7 @@ STMT_START {                               \
          break;                                                                      \
      case trie_latin_utf8_fold:                                                      \
          if ( foldlen>0 ) {                                                          \
-            uvc = utf8n_to_uvuni( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
+            uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
              foldlen -= len;                                                         \
              uscan += len;                                                           \
              len=0;                                                                  \
@@ -1265,7 +1265,7 @@ STMT_START {                               \
          }                                                                           \
          break;                                                                      \
      case trie_utf8:                                                                 \
-        uvc = utf8n_to_uvuni( (const U8*) uc, UTF8_MAXLEN, &len, uniflags );        \
+        uvc = utf8n_to_uvchr( (const U8*) uc, UTF8_MAXLEN, &len, uniflags );        \
          break;                                                                      \
      case trie_plain:                                                                \
          uvc = (UV)*uc;                                                              \
@@ -4050,7 +4050,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
                     while (chars) {
                         if (utf8_target) {
-                           uvc = utf8n_to_uvuni((U8*)uc, UTF8_MAXLEN, &len,
+                           uvc = utf8n_to_uvchr((U8*)uc, UTF8_MAXLEN, &len,
                                                     uniflags);
                             uc += len;
                         }
@@ -4063,7 +4063,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                         while (foldlen) {
                             if (!--chars)
                                 break;
-                           uvc = utf8n_to_uvuni(uscan, UTF8_MAXLEN, &len,
+                           uvc = utf8n_to_uvchr(uscan, UTF8_MAXLEN, &len,
                                             uniflags);
                             uscan += len;
                             foldlen -= len;
diff --git a/toke.c b/toke.c

index 7b397e3..53f1f4f 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1053,7 +1053,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                     ENTER;
                     SAVESPTR(PL_warnhook);
                     PL_warnhook = PERL_WARNHOOK_FATAL;
-                   utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
+                   utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
                     LEAVE;
                 }
             }
@@ -1437,13 +1437,13 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
                 bufend = PL_parser->bufend;
             }
         }
-       unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
+       unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
         if (retlen == (STRLEN)-1) {
             /* malformed UTF-8 */
             ENTER;
             SAVESPTR(PL_warnhook);
             PL_warnhook = PERL_WARNHOOK_FATAL;
-           utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
+           utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
             LEAVE;
         }
         return unichar;
@@ -2761,7 +2761,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
      {
          /* If warnings are on, this will print a more detailed analysis of what
           * is wrong than the error message below */
-        utf8n_to_uvuni(first_bad_char_loc,
+        utf8n_to_uvchr(first_bad_char_loc,
                         e - ((char *) first_bad_char_loc),
                         NULL, 0);
  
@@ -2903,7 +2903,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
          if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
              /* If warnings are on, this will print a more detailed analysis of
               * what is wrong than the error message below */
-            utf8n_to_uvuni(first_bad_char_loc,
+            utf8n_to_uvchr(first_bad_char_loc,
                             (char *) first_bad_char_loc - str,
                             NULL, 0);
  
@@ -3442,7 +3442,7 @@ S_scan_const(pTHX_ char *start)
                      }
  
                      if (has_utf8) {
-                       d = (char*)uvuni_to_utf8((U8*)d, uv);
+                       d = (char*)uvchr_to_utf8((U8*)d, uv);
                         if (PL_lex_inwhat == OP_TRANS &&
                             PL_sublex_info.sub_op) {
                             PL_sublex_info.sub_op->op_private |=
diff --git a/utf8.c b/utf8.c

index 8d7e6de..3981fe8 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -2422,7 +2422,7 @@ Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp,
               s = SvPV_const(*svp, len);
               if (len == 1)
                    /* EIGHTBIT */
-                  len = uvuni_to_utf8(ustrp, NATIVE_TO_UNI(*(U8*)s)) - ustrp;
+                  len = uvchr_to_utf8(ustrp, *(U8*)s) - ustrp;
               else {
                    Copy(s, ustrp, len, U8);
               }
@@ -3216,10 +3216,7 @@ Perl_swash_fetch(pTHX_ SV *swash, const U8 *ptr, bool do_utf8)
         /* If not cached, generate it via swatch_get */
         if (!svp || !SvPOK(*svp)
                  || !(tmps = (const U8*)SvPV_const(*svp, slen))) {
-           /* We use utf8n_to_uvuni() as we want an index into
-              Unicode tables, not a native character number.
-            */
-           const UV code_point = utf8n_to_uvuni(ptr, UTF8_MAXBYTES, 0,
+           const UV code_point = utf8n_to_uvchr(ptr, UTF8_MAXBYTES, 0,
                                            ckWARN(WARN_UTF8) ?
                                            0 : UTF8_ALLOW_ANY);
             swatch = swatch_get(swash,
@@ -3904,7 +3901,7 @@ Perl__swash_inversion_hash(pTHX_ SV* const swash)
  
             /* The key is the inverse mapping */
             char key[UTF8_MAXBYTES+1];
-           char* key_end = (char *) uvuni_to_utf8((U8*) key, val);
+           char* key_end = (char *) uvchr_to_utf8((U8*) key, val);
             STRLEN key_len = key_end - key;
  
             /* Get the list for the map */
author	Karl Williamson <public@khwilliamson.com>
	Sun, 24 Feb 2013 23:43:59 +0000 (16:43 -0700)
committer	Karl Williamson <public@khwilliamson.com>
	Thu, 29 Aug 2013 15:55:56 +0000 (09:55 -0600)
cygwin/cygwin.c		patch \| blob \| history
doop.c		patch \| blob \| history
op.c		patch \| blob \| history
pp_pack.c		patch \| blob \| history
regcomp.c		patch \| blob \| history
regexec.c		patch \| blob \| history
toke.c		patch \| blob \| history
utf8.c		patch \| blob \| history