}
}
+
+/* return atoi(p), unless it's too big to sensibly be a backref,
+ * in which case return I32_MAX (rather than possibly 32-bit wrapping) */
+
+static I32
+S_backref_value(char *p)
+{
+ char *q = p;
+
+ for (;isDIGIT(*q); q++); /* calculate length of num */
+ if (q - p == 0 || q - p > 9)
+ return I32_MAX;
+ return atoi(p);
+}
+
+
/*
- regatom - the lowest level
case '5': case '6': case '7': case '8': case '9':
{
I32 num;
- bool isg = *RExC_parse == 'g';
- bool isrel = 0;
bool hasbrace = 0;
- if (isg) {
+
+ if (*RExC_parse == 'g') {
+ bool isrel = 0;
+
RExC_parse++;
if (*RExC_parse == '{') {
RExC_parse++;
if (isrel) RExC_parse--;
RExC_parse -= 2;
goto parse_named_seq;
- } }
- num = atoi(RExC_parse);
- if (isg && num == 0) {
- if (*RExC_parse == '0') {
+ }
+
+ num = S_backref_value(RExC_parse);
+ if (num == 0)
vFAIL("Reference to invalid group 0");
+ else if (num == I32_MAX) {
+ if (isDIGIT(*RExC_parse))
+ vFAIL("Reference to nonexistent group");
+ else
+ vFAIL("Unterminated \\g... pattern");
}
- else {
- vFAIL("Unterminated \\g... pattern");
+
+ if (isrel) {
+ num = RExC_npar - num;
+ if (num < 1)
+ vFAIL("Reference to nonexistent or unclosed group");
}
}
- if (isrel) {
- num = RExC_npar - num;
- if (num < 1)
- vFAIL("Reference to nonexistent or unclosed group");
+ else {
+ num = S_backref_value(RExC_parse);
+ /* bare \NNN might be backref or octal */
+ if (num == I32_MAX || (num > 9 && num >= RExC_npar
+ && *RExC_parse != '8' && *RExC_parse != '9'))
+ /* Probably a character specified in octal, e.g. \35 */
+ goto defchar;
}
- if (!isg && num > 9 && num >= RExC_npar && *RExC_parse != '8' && *RExC_parse != '9')
- /* Probably a character specified in octal, e.g. \35 */
- goto defchar;
- else {
+
+ /* at this point RExC_parse definitely points to a backref
+ * number */
+ {
#ifdef RE_TRACK_PATTERN_OFFSETS
char * const parse_start = RExC_parse - 1; /* MJD */
#endif
* 118 OR as "\11" . "8" depending on whether there
* were 118 capture buffers defined already in the
* pattern. */
- if ( !isDIGIT(p[1]) || atoi(p) <= RExC_npar )
+ if ( !isDIGIT(p[1]) || S_backref_value(p) <= RExC_npar)
{ /* Not to be treated as an octal constant, go
find backref */
--p;
a\87 a87 c - Reference to nonexistent group in regex
a\97 a97 c - Reference to nonexistent group in regex
+# avoid problems with 32-bit signed integer overflow
+
+(.)\g2147483648} x c - Reference to nonexistent group in regex
+(.)\g2147483649} x c - Reference to nonexistent group in regex
+(.)\g2147483650} x c - Reference to nonexistent group in regex
+(.)\g4294967296} x c - Reference to nonexistent group in regex
+(.)\g4294967297} x c - Reference to nonexistent group in regex
+(.)\g4294967298} x c - Reference to nonexistent group in regex
+a(.)\g2147483648} x c - Reference to nonexistent group in regex
+a(.)\g2147483649} x c - Reference to nonexistent group in regex
+a(.)\g2147483650} x c - Reference to nonexistent group in regex
+a(.)\g4294967296} x c - Reference to nonexistent group in regex
+a(.)\g4294967297} x c - Reference to nonexistent group in regex
+a(.)\g4294967298} x c - Reference to nonexistent group in regex
+
+(.)\g{2147483648} x c - Reference to nonexistent group in regex
+(.)\g{2147483649} x c - Reference to nonexistent group in regex
+(.)\g{2147483650} x c - Reference to nonexistent group in regex
+(.)\g{4294967296} x c - Reference to nonexistent group in regex
+(.)\g{4294967297} x c - Reference to nonexistent group in regex
+(.)\g{4294967298} x c - Reference to nonexistent group in regex
+a(.)\g{2147483648} x c - Reference to nonexistent group in regex
+a(.)\g{2147483649} x c - Reference to nonexistent group in regex
+a(.)\g{2147483650} x c - Reference to nonexistent group in regex
+a(.)\g{4294967296} x c - Reference to nonexistent group in regex
+a(.)\g{4294967297} x c - Reference to nonexistent group in regex
+a(.)\g{4294967298} x c - Reference to nonexistent group in regex
+
+(.)\2147483648 b\o{214}7483648 y $1 b
+(.)\2147483649 b\o{214}7483649 y $1 b
+(.)\2147483650 b\o{214}7483650 y $1 b
+(.)\4294967296 b\o{42}94967296 y $1 b
+(.)\4294967297 b\o{42}94967297 y $1 b
+(.)\4294967298 b\o{42}94967298 y $1 b
+a(.)\2147483648 ab\o{214}7483648 y $1 b
+a(.)\2147483649 ab\o{214}7483649 y $1 b
+a(.)\2147483650 ab\o{214}7483650 y $1 b
+a(.)\4294967296 ab\o{42}94967296 y $1 b
+a(.)\4294967297 ab\o{42}94967297 y $1 b
+a(.)\4294967298 ab\o{42}94967298 y $1 b
+
# The below was inserting a NULL into the character class.
[\8\9] \000 Sn - -
[\8\9] - sc $& Unrecognized escape \\8 in character class