break;
}
}
-
- // Optimization: implicit anchoring.
- // If the expression begins with a .* loop, add an anchor to the beginning:
- // - If Singleline is set such that '.' eats anything, the .* will zip to the end of the string and then backtrack through
- // the whole thing looking for a match; since it will have examined everything, there's no benefit to examining it all
- // again, and we can anchor to beginning.
- // - If Singleline is not set, then '.' eats anything up until a '\n' and backtracks from there, so we can similarly avoid
- // re-examining that content and anchor to the beginning of lines.
- // We are currently very conservative here, only examining concat nodes. This could be loosened in the future, e.g. to
- // explore captures (but think through any implications of there being a back ref to that capture), to explore loops and
- // lazy loops a positive minimum (but the anchor shouldn't be part of the loop), to explore alternations and support adding
- // an anchor if all of them begin with appropriate star loops (though this could also be accomplished by factoring out the
- // loops to be before the alternation), etc.
- {
- RegexNode node = rootNode.Child(0); // skip implicit root capture node
- while (true)
- {
- bool singleline = (node.Options & RegexOptions.Singleline) != 0;
- switch (node.Type)
- {
- case Concatenate:
- node = node.Child(0);
- continue;
-
- case Setloop when singleline && node.N == int.MaxValue && node.Str == RegexCharClass.AnyClass:
- case Setloopatomic when singleline && node.N == int.MaxValue && node.Str == RegexCharClass.AnyClass:
- case Notoneloop when !singleline && node.N == int.MaxValue && node.Ch == '\n':
- case Notoneloopatomic when !singleline && node.N == int.MaxValue && node.Ch == '\n':
- RegexNode? parent = node.Next;
- var anchor = new RegexNode(singleline ? Beginning : Bol, node.Options);
- Debug.Assert(parent != null);
- if (parent.Type == Concatenate)
- {
- Debug.Assert(parent.ChildCount() >= 2);
- Debug.Assert(parent.Children is List<RegexNode>);
- anchor.Next = parent;
- ((List<RegexNode>)parent.Children).Insert(0, anchor);
- }
- else
- {
- Debug.Assert(parent.Type == Capture && parent.Next is null, "Only valid capture is the implicit root capture");
- var concat = new RegexNode(Concatenate, parent.Options);
- concat.AddChild(anchor);
- concat.AddChild(node);
- parent.ReplaceChild(0, concat);
- }
- break;
- }
-
- break;
- }
- }
}
// Optimization: Unnecessary root atomic.
yield return new object[] { @"(?>\w+)(?<!a)", "aa", RegexOptions.None, 0, 2, false, string.Empty };
yield return new object[] { @".+a", "baa", RegexOptions.None, 0, 3, true, "baa" };
yield return new object[] { @"[ab]+a", "cacbaac", RegexOptions.None, 0, 7, true, "baa" };
+ foreach (RegexOptions lineOption in new[] { RegexOptions.None, RegexOptions.Singleline, RegexOptions.Multiline })
+ {
+ yield return new object[] { @".*", "abc", lineOption, 1, 2, true, "bc" };
+ yield return new object[] { @".*c", "abc", lineOption, 1, 2, true, "bc" };
+ yield return new object[] { @"b.*", "abc", lineOption, 1, 2, true, "bc" };
+ yield return new object[] { @".*", "abc", lineOption, 2, 1, true, "c" };
+ }
// Using beginning/end of string chars \A, \Z: Actual - "\\Aaaa\\w+zzz\\Z"
yield return new object[] { @"\Aaaa\w+zzz\Z", "aaaasdfajsdlfjzzz", RegexOptions.IgnoreCase, 0, 17, true, "aaaasdfajsdlfjzzz" };
Assert.True(Regex.IsMatch(input, pattern));
}
+ // Note: this block will fail if any inputs attempt to look for anchors or lookbehinds at the initial position,
+ // as there is a difference between Match(input, beginning) and Match(input, beginning, input.Length - beginning)
+ // in that the former doesn't modify from 0 what the engine sees as the beginning of the input whereas the latter
+ // is equivalent to taking a substring and then matching on that. However, as we currently don't have any such inputs,
+ // it's currently a viable way to test the additional overload. Same goes for the similar case below with options.
if (beginning + length == input.Length)
{
// Use Match(string, int)
Assert.True(r.IsMatch(input, beginning));
}
- else
- {
- // Use Match(string, int, int)
- VerifyMatch(r.Match(input, beginning, length), true, expected);
- }
+
+ // Use Match(string, int, int)
+ VerifyMatch(r.Match(input, beginning, length), true, expected);
}
r = new Regex(pattern, options);
}
}
+ public static IEnumerable<object[]> Match_StartatDiffersFromBeginning_MemberData()
+ {
+ foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.Singleline, RegexOptions.Multiline })
+ {
+ // Anchors
+ yield return new object[] { @"^.*", "abc", options, 0, true, true };
+ yield return new object[] { @"^.*", "abc", options, 1, false, true };
+
+ // Positive Lookbehinds
+ yield return new object[] { @"(?<=abc)def", "abcdef", options, 3, true, false };
+
+ // Negative Lookbehinds
+ yield return new object[] { @"(?<!abc)def", "abcdef", options, 3, false, true };
+ }
+ }
+
+ [Theory]
+ [MemberData(nameof(Match_StartatDiffersFromBeginning_MemberData))]
+ [MemberData(nameof(RegexCompilationHelper.TransformRegexOptions), nameof(Match_StartatDiffersFromBeginning_MemberData), 2, MemberType = typeof(RegexCompilationHelper))]
+ public void Match_StartatDiffersFromBeginning(string pattern, string input, RegexOptions options, int startat, bool expectedSuccessStartAt, bool expectedSuccessBeginning)
+ {
+ var r = new Regex(pattern, options);
+
+ Assert.Equal(expectedSuccessStartAt, r.IsMatch(input, startat));
+ Assert.Equal(expectedSuccessStartAt, r.Match(input, startat).Success);
+
+ Assert.Equal(expectedSuccessBeginning, r.Match(input.Substring(startat)).Success);
+ Assert.Equal(expectedSuccessBeginning, r.Match(input, startat, input.Length - startat).Success);
+ }
+
private static void VerifyMatch(Match match, bool expectedSuccess, CaptureData[] expected)
{
Assert.Equal(expectedSuccess, match.Success);