From 01c5845aa92ba1bd86b3f470191149df4f878d3d Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 18 Aug 2012 12:19:00 -0600 Subject: [PATCH] regcomp.c: Special case /[UV_MAX]/ The highest code point representable on the machine has to be special cased. Earlier commits for 5.14 did this for ranges ending in this code point, but it turns out there needs to be a special-special case when the range contains just it. --- regcomp.c | 12 +++++++++++- t/re/pat_advanced.t | 7 +++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/regcomp.c b/regcomp.c index b5ed584..eb7655e 100644 --- a/regcomp.c +++ b/regcomp.c @@ -7398,7 +7398,15 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV current = array[i]; if (current >= end) { /* Finished if beyond the end of what we are populating */ - return; + if (LIKELY(end < UV_MAX)) { + return; + } + + /* We get here when the upper bound is the maximum + * representable on the machine, and we are looking for just + * that code point. Have to special case it */ + i = len; + goto join_end_of_list; } } assert(current >= start); @@ -7415,6 +7423,8 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV swatch[offset >> 3] |= 1 << (offset & 7); } + join_end_of_list: + /* Quit if at the end of the list */ if (i >= len) { diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index 771e441..7408fdf 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -2178,6 +2178,9 @@ EOP "chr(0xFFFF_FFFE) can match a Unicode property"); ok(chr(0xFFFF_FFFF) =~ /\p{Is_32_Bit_Super}/, "chr(0xFFFF_FFFF) can match a Unicode property"); + my $p = qr/^[\x{FFFF_FFFF}]$/; + ok(chr(0xFFFF_FFFF) =~ $p, + "chr(0xFFFF_FFFF) can match itself in a [class]"); } else { no warnings 'overflow'; @@ -2186,6 +2189,10 @@ EOP ok(chr(0xFFFF_FFFF_FFFF_FFFF) =~ qr/^\p{Is_Portable_Super}$/, "chr(0xFFFF_FFFF_FFFF_FFFF) can match a Unicode property"); + my $p = qr/^[\x{FFFF_FFFF_FFFF_FFFF}]$/; + ok(chr(0xFFFF_FFFF_FFFF_FFFF) =~ $p, + "chr(0xFFFF_FFFF_FFFF_FFFF) can match itself in a [class]"); + # This test is because something was declared as 32 bits, but # should have been cast to 64; only a problem where # sizeof(STRLEN) != sizeof(UV) -- 2.7.4