Document the to_utf8_*() functions.

author Jarkko Hietaniemi <jhi@iki.fi>

Tue, 1 Jan 2002 17:53:44 +0000 (17:53 +0000)

committer Jarkko Hietaniemi <jhi@iki.fi>

Tue, 1 Jan 2002 17:53:44 +0000 (17:53 +0000)
author Jarkko Hietaniemi <jhi@iki.fi>
Tue, 1 Jan 2002 17:53:44 +0000 (17:53 +0000)
committer Jarkko Hietaniemi <jhi@iki.fi>
Tue, 1 Jan 2002 17:53:44 +0000 (17:53 +0000)
diff --git a/embed.pl b/embed.pl

index 32e7925..67d7d0c 100755 (executable)
--- a/embed.pl
+++ b/embed.pl
@@ -1827,10 +1827,10 @@ Ap      |void   |taint_env
  Ap     |void   |taint_proper   |const char* f|const char* s
  Apd    |UV     |to_utf8_case   |U8 *p|U8* ustrp|STRLEN *lenp \
                                 |SV **swash|char *normal|char *special
-Ap     |UV     |to_utf8_lower  |U8 *p|U8* ustrp|STRLEN *lenp
-Ap     |UV     |to_utf8_upper  |U8 *p|U8* ustrp|STRLEN *lenp
-Ap     |UV     |to_utf8_title  |U8 *p|U8* ustrp|STRLEN *lenp
-Ap     |UV     |to_utf8_fold   |U8 *p|U8* ustrp|STRLEN *lenp
+Apd    |UV     |to_utf8_lower  |U8 *p|U8* ustrp|STRLEN *lenp
+Apd    |UV     |to_utf8_upper  |U8 *p|U8* ustrp|STRLEN *lenp
+Apd    |UV     |to_utf8_title  |U8 *p|U8* ustrp|STRLEN *lenp
+Apd    |UV     |to_utf8_fold   |U8 *p|U8* ustrp|STRLEN *lenp
  #if defined(UNLINK_ALL_VERSIONS)
  Ap     |I32    |unlnk          |char* f
  #endif
diff --git a/pod/perlapi.pod b/pod/perlapi.pod

index 847bc77..6228c75 100644 (file)
--- a/pod/perlapi.pod
+++ b/pod/perlapi.pod
@@ -4333,6 +4333,70 @@ to the hash is by Perl_to_utf8_case().
  =for hackers
  Found in file utf8.c
  
+=item to_utf8_fold
+
+Convert the UTF-8 encoded character at p to its foldcase version and
+store that in UTF-8 in ustrp and its length in bytes in lenp.  Note
+that the ustrp needs to be at least UTF8_MAXLEN_FOLD+1 bytes since the
+foldcase version may be longer than the original character (up to
+three characters).
+
+The first character of the foldcased version is returned
+(but note, as explained above, that there may be more.)
+
+       UV      to_utf8_fold(U8 *p, U8* ustrp, STRLEN *lenp)
+
+=for hackers
+Found in file utf8.c
+
+=item to_utf8_lower
+
+Convert the UTF-8 encoded character at p to its lowercase version and
+store that in UTF-8 in ustrp and its length in bytes in lenp.  Note
+that the ustrp needs to be at least UTF8_MAXLEN_UCLC+1 bytes since the
+lowercase version may be longer than the original character (up to two
+characters).
+
+The first character of the lowercased version is returned
+(but note, as explained above, that there may be more.)
+
+       UV      to_utf8_lower(U8 *p, U8* ustrp, STRLEN *lenp)
+
+=for hackers
+Found in file utf8.c
+
+=item to_utf8_title
+
+Convert the UTF-8 encoded character at p to its titlecase version and
+store that in UTF-8 in ustrp and its length in bytes in lenp.  Note
+that the ustrp needs to be at least UTF8_MAXLEN_UCLC+1 bytes since the
+titlecase version may be longer than the original character (up to two
+characters).
+
+The first character of the titlecased version is returned
+(but note, as explained above, that there may be more.)
+
+       UV      to_utf8_title(U8 *p, U8* ustrp, STRLEN *lenp)
+
+=for hackers
+Found in file utf8.c
+
+=item to_utf8_upper
+
+Convert the UTF-8 encoded character at p to its uppercase version and
+store that in UTF-8 in ustrp and its length in bytes in lenp.  Note
+that the ustrp needs to be at least UTF8_MAXLEN_UCLC+1 bytes since the
+uppercase version may be longer than the original character (up to two
+characters).
+
+The first character of the uppercased version is returned
+(but note, as explained above, that there may be more.)
+
+       UV      to_utf8_upper(U8 *p, U8* ustrp, STRLEN *lenp)
+
+=for hackers
+Found in file utf8.c
+
  =item utf8n_to_uvchr
  
  Returns the native character value of the first character in the string C<s>
diff --git a/utf8.c b/utf8.c

index 296cb6a..500ac4b 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1315,7 +1315,7 @@ Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp,char *normal
                    ustrp[1] = UTF8_EIGHT_BIT_LO(c);
                    *lenp = 2;
               }
-             return 0;
+             return utf8_to_uvchr(ustrp, 0);
          }
      }
      if (lenp)
@@ -1324,6 +1324,20 @@ Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp,char *normal
      return uv;
  }
  
+/*
+=for apidoc A|UV|to_utf8_upper|U8 *p|U8 *ustrp|STRLEN *lenp
+
+Convert the UTF-8 encoded character at p to its uppercase version and
+store that in UTF-8 in ustrp and its length in bytes in lenp.  Note
+that the ustrp needs to be at least UTF8_MAXLEN_UCLC+1 bytes since the
+uppercase version may be longer than the original character (up to two
+characters).
+
+The first character of the uppercased version is returned
+(but note, as explained above, that there may be more.)
+
+=cut */
+
  UV
  Perl_to_utf8_upper(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp)
  {
@@ -1331,6 +1345,20 @@ Perl_to_utf8_upper(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp)
                               &PL_utf8_toupper, "ToUpper", "utf8::ToSpecUpper");
  }
  
+/*
+=for apidoc A|UV|to_utf8_title|U8 *p|U8 *ustrp|STRLEN *lenp
+
+Convert the UTF-8 encoded character at p to its titlecase version and
+store that in UTF-8 in ustrp and its length in bytes in lenp.  Note
+that the ustrp needs to be at least UTF8_MAXLEN_UCLC+1 bytes since the
+titlecase version may be longer than the original character (up to two
+characters).
+
+The first character of the titlecased version is returned
+(but note, as explained above, that there may be more.)
+
+=cut */
+
  UV
  Perl_to_utf8_title(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp)
  {
@@ -1338,6 +1366,20 @@ Perl_to_utf8_title(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp)
                               &PL_utf8_totitle, "ToTitle", "utf8::ToSpecTitle");
  }
  
+/*
+=for apidoc A|UV|to_utf8_lower|U8 *p|U8 *ustrp|STRLEN *lenp
+
+Convert the UTF-8 encoded character at p to its lowercase version and
+store that in UTF-8 in ustrp and its length in bytes in lenp.  Note
+that the ustrp needs to be at least UTF8_MAXLEN_UCLC+1 bytes since the
+lowercase version may be longer than the original character (up to two
+characters).
+
+The first character of the lowercased version is returned
+(but note, as explained above, that there may be more.)
+
+=cut */
+
  UV
  Perl_to_utf8_lower(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp)
  {
@@ -1345,6 +1387,20 @@ Perl_to_utf8_lower(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp)
                               &PL_utf8_tolower, "ToLower", "utf8::ToSpecLower");
  }
  
+/*
+=for apidoc A|UV|to_utf8_fold|U8 *p|U8 *ustrp|STRLEN *lenp
+
+Convert the UTF-8 encoded character at p to its foldcase version and
+store that in UTF-8 in ustrp and its length in bytes in lenp.  Note
+that the ustrp needs to be at least UTF8_MAXLEN_FOLD+1 bytes since the
+foldcase version may be longer than the original character (up to
+three characters).
+
+The first character of the foldcased version is returned
+(but note, as explained above, that there may be more.)
+
+=cut */
+
  UV
  Perl_to_utf8_fold(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp)
  {
author	Jarkko Hietaniemi <jhi@iki.fi>
	Tue, 1 Jan 2002 17:53:44 +0000 (17:53 +0000)
committer	Jarkko Hietaniemi <jhi@iki.fi>
	Tue, 1 Jan 2002 17:53:44 +0000 (17:53 +0000)
embed.pl		patch \| blob \| history
pod/perlapi.pod		patch \| blob \| history
utf8.c		patch \| blob \| history