From 1b9f127b4dff87fa8c6be8acf741af73f72fea81 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 27 Nov 2010 09:42:48 -0700 Subject: [PATCH] Add Perl_foldEQ_latin1() This function compares two non-utf8 strings to see if they are equivalent without regards to case differences. It does not work nor check for three problematic code points that require special handling: MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, and LATIN_SMALL_LETTER_Y_WITH_DIAERESIS. make regen required --- embed.fnc | 1 + embed.h | 1 + global.sym | 1 + proto.h | 7 +++++++ util.c | 21 +++++++++++++++++++++ 5 files changed, 31 insertions(+) diff --git a/embed.fnc b/embed.fnc index 9effd6b..fe8f43c 100644 --- a/embed.fnc +++ b/embed.fnc @@ -517,6 +517,7 @@ Am |I32 |ibcmp_utf8 |NN const char *s1|NULLOK char **pe1|UV l1 \ Apd |I32 |foldEQ_utf8 |NN const char *s1|NULLOK char **pe1|UV l1 \ |bool u1|NN const char *s2|NULLOK char **pe2 \ |UV l2|bool u2 +AnpP |I32 |foldEQ_latin1 |NN const char* a|NN const char* b|I32 len #if defined(PERL_IN_DOIO_C) sR |bool |ingroup |Gid_t testgid|bool effective #endif diff --git a/embed.h b/embed.h index 441b6a5..d484a10 100644 --- a/embed.h +++ b/embed.h @@ -132,6 +132,7 @@ #define find_rundefsv() Perl_find_rundefsv(aTHX) #define find_rundefsvoffset() Perl_find_rundefsvoffset(aTHX) #define foldEQ Perl_foldEQ +#define foldEQ_latin1 Perl_foldEQ_latin1 #define foldEQ_locale Perl_foldEQ_locale #define foldEQ_utf8(a,b,c,d,e,f,g,h) Perl_foldEQ_utf8(aTHX_ a,b,c,d,e,f,g,h) #ifndef PERL_IMPLICIT_CONTEXT diff --git a/global.sym b/global.sym index 007ed52..7e8f38b 100644 --- a/global.sym +++ b/global.sym @@ -128,6 +128,7 @@ Perl_find_runcv Perl_find_rundefsv Perl_find_rundefsvoffset Perl_foldEQ +Perl_foldEQ_latin1 Perl_foldEQ_locale Perl_foldEQ_utf8 Perl_form diff --git a/proto.h b/proto.h index 096e84f..a05f2b9 100644 --- a/proto.h +++ b/proto.h @@ -968,6 +968,13 @@ PERL_CALLCONV I32 Perl_foldEQ(const char* a, const char* b, I32 len) #define PERL_ARGS_ASSERT_FOLDEQ \ assert(a); assert(b) +PERL_CALLCONV I32 Perl_foldEQ_latin1(const char* a, const char* b, I32 len) + __attribute__pure__ + __attribute__nonnull__(1) + __attribute__nonnull__(2); +#define PERL_ARGS_ASSERT_FOLDEQ_LATIN1 \ + assert(a); assert(b) + PERL_CALLCONV I32 Perl_foldEQ_locale(const char* a, const char* b, I32 len) __attribute__pure__ __attribute__nonnull__(1) diff --git a/util.c b/util.c index f3c27f9..02861f0 100644 --- a/util.c +++ b/util.c @@ -930,6 +930,27 @@ Perl_foldEQ(const char *s1, const char *s2, register I32 len) } return 1; } +I32 +Perl_foldEQ_latin1(const char *s1, const char *s2, register I32 len) +{ + /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on + * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor + * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor + * does it check that the strings each have at least 'len' characters */ + + register const U8 *a = (const U8 *)s1; + register const U8 *b = (const U8 *)s2; + + PERL_ARGS_ASSERT_FOLDEQ_LATIN1; + + while (len--) { + if (*a != *b && *a != PL_fold_latin1[*b]) { + return 0; + } + a++, b++; + } + return 1; +} /* =for apidoc foldEQ_locale -- 2.7.4