foldEQ_utf8_flags: Add no-mixing ASCII option

author Karl Williamson <public@khwilliamson.com>

Thu, 10 Feb 2011 04:41:34 +0000 (21:41 -0700)

committer Karl Williamson <public@khwilliamson.com>

Mon, 14 Feb 2011 15:41:39 +0000 (08:41 -0700)
author Karl Williamson <public@khwilliamson.com>
Thu, 10 Feb 2011 04:41:34 +0000 (21:41 -0700)
committer Karl Williamson <public@khwilliamson.com>
Mon, 14 Feb 2011 15:41:39 +0000 (08:41 -0700)
diff --git a/utf8.c b/utf8.c

index 4399fca..b5d8531 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -3033,6 +3033,13 @@ instead of upper/lowercasing both the characters, see
  http://www.unicode.org/unicode/reports/tr21/ (Case Mappings).
  
  =cut */
+
+/* A flags parameter has been added which may change, and hence isn't
+ * externally documented.  Currently it is:
+ *  0 for as-documented above
+ *  FOLDEQ_UTF8_NOMIX_ASCII meaning that if a non-ASCII character folds to an
+                           ASCII one, to not match
+ */
  I32
  Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const char *s2, char **pe2, register UV l2, bool u2, U32 flags)
  {
@@ -3100,7 +3107,16 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, register UV l1, bool u1
          /* If at the beginning of a new character in s1, get its fold to use
           * and the length of the fold */
          if (n1 == 0) {
-            if (u1) {
+           if (isASCII(*p1)) {
+
+               /* But if not to mix non- with ASCII, fail */
+               if ((flags & FOLDEQ_UTF8_NOMIX_ASCII) && ! isASCII(*p2)) {
+                   return 0;
+               }
+               n1 = 1;
+               *foldbuf1 = toLOWER(*p1);   /* ASCII range fold is lowercase */
+           }
+           else if (u1) {
                  to_utf8_fold(p1, foldbuf1, &n1);
              }
              else {  /* Not utf8, convert to it first and then get fold */
@@ -3111,7 +3127,14 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, register UV l1, bool u1
          }
  
          if (n2 == 0) {    /* Same for s2 */
-            if (u2) {
+           if (isASCII(*p2)) {
+               if (flags && ! isASCII(*p1)) {
+                   return 0;
+               }
+               n2 = 1;
+               *foldbuf2 = toLOWER(*p2);
+           }
+           else if (u2) {
                  to_utf8_fold(p2, foldbuf2, &n2);
              }
              else {
diff --git a/utf8.h b/utf8.h

index 2386eb6..e0dee68 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -22,6 +22,8 @@
  
  #define foldEQ_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2) \
                     foldEQ_utf8_flags(s1, pe1, l1, u1, s2, pe2, l2, u2, 0)
+#define FOLDEQ_UTF8_NOMIX_ASCII (1 << 0)
+
  /*
  =for apidoc ibcmp_utf8
author	Karl Williamson <public@khwilliamson.com>
	Thu, 10 Feb 2011 04:41:34 +0000 (21:41 -0700)
committer	Karl Williamson <public@khwilliamson.com>
	Mon, 14 Feb 2011 15:41:39 +0000 (08:41 -0700)