Rename "Greedy" subexpressions to "Atomic"
authorStephen Toub <stoub@microsoft.com>
Mon, 6 Jan 2020 21:43:05 +0000 (16:43 -0500)
committerStephen Toub <stoub@microsoft.com>
Thu, 9 Jan 2020 03:50:10 +0000 (22:50 -0500)
The greedy naming is really confusing (we should fix it in the framework documentation as well).

In general regular expression nomenclature, "greedy" is used as the opposite of "lazy", indicating how much a loop initially consumes, e.g. does `a*` first try consuming as many 'a's as possible or does it first try consuming as few 'a's as possible... it's "greedy" (denoted as `a*`) if it consumes as many as possible and "lazy" (denoted as `a*?` if it consumes as few.

How aggressively it consumes, however, is orthogonal to whether it backtracks.  Whereas `a*` is greedy and backtracking and `a*?` is lazy and backtracking, `(?>a*)` is greedy and non-backtracking and `(?>a*?) is lazy and non-backtracking.

Unfortunately, the nomenclature in the implementation and the documentation describes the `(?> ... )` as being a "greedy subexpression", which then conflates the meaning of "greedy".

The rest of the industry refers to these instead as "atomic", so I've changed it to that in the implementation.

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFCD.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs
src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs

index bc4e4cb..20eefbc 100644 (file)
@@ -84,9 +84,9 @@ namespace System.Text.RegularExpressions
         // Manufactured primitive operations, derived from the tree that comes from the parser.
         // These exist to reduce backtracking (both actually performing it and spitting code for it).
 
-        public const int Oneloopgreedy = 43;      // lef,back char,min,max    (?> a {,n} )
-        public const int Notoneloopgreedy = 44;   // lef,back set,min,max     (?> . {,n} )
-        public const int Setloopgreedy = 45;      // lef,back set,min,max     (?> [\d]{,n} )
+        public const int Oneloopatomic = 43;      // lef,back char,min,max    (?> a {,n} )
+        public const int Notoneloopatomic = 44;   // lef,back set,min,max     (?> . {,n} )
+        public const int Setloopatomic = 45;      // lef,back set,min,max     (?> [\d]{,n} )
 
         // Modifiers for alternate modes
         public const int Mask = 63;   // Mask to get unmodified ordinary operator
@@ -206,15 +206,15 @@ namespace System.Text.RegularExpressions
                 case Onerep:
                 case Notonerep:
                 case Oneloop:
-                case Oneloopgreedy:
+                case Oneloopatomic:
                 case Notoneloop:
-                case Notoneloopgreedy:
+                case Notoneloopatomic:
                 case Onelazy:
                 case Notonelazy:
                 case Setlazy:
                 case Setrep:
                 case Setloop:
-                case Setloopgreedy:
+                case Setloopatomic:
                     return 3;
 
                 default:
@@ -238,7 +238,7 @@ namespace System.Text.RegularExpressions
             "Setjump", "Backjump", "Forejump", "Testref", "Goto",
             "Prune", "Stop",
             "ECMABoundary", "NonECMABoundary",
-            "Oneloopgreedy", "Notoneloopgreedy", "Setloopgreedy"
+            "Oneloopatomic", "Notoneloopatomic", "Setloopatomic"
         };
 
         private static string OperatorDescription(int Opcode)
@@ -275,9 +275,9 @@ namespace System.Text.RegularExpressions
                 case Onerep:
                 case Notonerep:
                 case Oneloop:
-                case Oneloopgreedy:
+                case Oneloopatomic:
                 case Notoneloop:
-                case Notoneloopgreedy:
+                case Notoneloopatomic:
                 case Onelazy:
                 case Notonelazy:
                     sb.Append("Ch = ");
@@ -287,7 +287,7 @@ namespace System.Text.RegularExpressions
                 case Set:
                 case Setrep:
                 case Setloop:
-                case Setloopgreedy:
+                case Setloopatomic:
                 case Setlazy:
                     sb.Append("Set = ");
                     sb.Append(RegexCharClass.SetDescription(Strings[Codes[offset + 1]]));
@@ -336,14 +336,14 @@ namespace System.Text.RegularExpressions
                 case Onerep:
                 case Notonerep:
                 case Oneloop:
-                case Oneloopgreedy:
+                case Oneloopatomic:
                 case Notoneloop:
-                case Notoneloopgreedy:
+                case Notoneloopatomic:
                 case Onelazy:
                 case Notonelazy:
                 case Setrep:
                 case Setloop:
-                case Setloopgreedy:
+                case Setloopatomic:
                 case Setlazy:
                     sb.Append(", Rep = ");
                     if (Codes[offset + 2] == int.MaxValue)
index e1cb3f5..c969618 100644 (file)
@@ -1545,12 +1545,12 @@ namespace System.Text.RegularExpressions
                         case RegexNode.Eol:
                         case RegexNode.End:
                         case RegexNode.EndZ:
-                        // {Set/One}loopgreedy are optimized nodes that represent non-backtracking variable-length loops.
+                        // {Set/One/Notone}loopatomic are optimized nodes that represent non-backtracking variable-length loops.
                         // These consume their {Set/One} inputs as long as they match, and don't give up anything they
                         // matched, which means we can support them without backtracking.
-                        case RegexNode.Oneloopgreedy:
-                        case RegexNode.Notoneloopgreedy:
-                        case RegexNode.Setloopgreedy:
+                        case RegexNode.Oneloopatomic:
+                        case RegexNode.Notoneloopatomic:
+                        case RegexNode.Setloopatomic:
                         // "Empty" is easy: nothing is emitted for it.
                         // "Nothing" is also easy: it doesn't match anything.
                         case RegexNode.Empty:
@@ -1564,7 +1564,7 @@ namespace System.Text.RegularExpressions
                         case RegexNode.Oneloop:
                         case RegexNode.Notoneloop:
                         case RegexNode.Setloop:
-                            Debug.Assert(node.Next == null || node.Next.Type != RegexNode.Greedy, "Loop should have been transformed into a greedy type.");
+                            Debug.Assert(node.Next == null || node.Next.Type != RegexNode.Atomic, "Loop should have been transformed into an atomic type.");
                             goto case RegexNode.Onelazy;
                         case RegexNode.Onelazy:
                         case RegexNode.Notonelazy:
@@ -1573,17 +1573,17 @@ namespace System.Text.RegularExpressions
                             break;
 
                         // {Lazy}Loop repeaters are the same, except their child also needs to be supported.
-                        // We also support such loops being greedy.
+                        // We also support such loops being atomic.
                         case RegexNode.Loop:
                         case RegexNode.Lazyloop:
                             supported =
-                                (node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Greedy)) &&
+                                (node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic)) &&
                                 NodeSupportsNonBacktrackingImplementation(node.Child(0), level + 1);
                             break;
 
-                        // We can handle greedy as long as we can handle making its child greedy, or
+                        // We can handle atomic as long as we can handle making its child atomic, or
                         // its child doesn't have that concept.
-                        case RegexNode.Greedy:
+                        case RegexNode.Atomic:
                         // Lookahead assertions also only require that the child node be supported.
                         // The RightToLeft check earlier is important to differentiate lookbehind,
                         // which is not supported.
@@ -1592,12 +1592,12 @@ namespace System.Text.RegularExpressions
                             supported = NodeSupportsNonBacktrackingImplementation(node.Child(0), level + 1);
                             break;
 
-                        // We can handle alternates as long as they're greedy (a root / global alternate is
-                        // effectively greedy, as nothing will try to backtrack into it as it's the last thing).
+                        // We can handle alternates as long as they're atomic (a root / global alternate is
+                        // effectively atomic, as nothing will try to backtrack into it as it's the last thing).
                         // Its children must all also be supported.
                         case RegexNode.Alternate:
                             if (node.Next != null &&
-                                (node.Next.Type == RegexNode.Greedy || // greedy alternate
+                                (node.Next.Type == RegexNode.Atomic || // atomic alternate
                                 (node.Next.Type == RegexNode.Capture && node.Next.Next is null))) // root alternate
                             {
                                 goto case RegexNode.Concatenate;
@@ -1710,11 +1710,11 @@ namespace System.Text.RegularExpressions
                 }
             }
 
-            // Emits the code for a greedy alternate, one that once a branch successfully matches is non-backtracking into it.
+            // Emits the code for an atomic alternate, one that once a branch successfully matches is non-backtracking into it.
             // This amounts to generating the code for each branch, with failures in a branch resetting state to what it was initially
             // and then jumping to the next branch. We don't need to worry about uncapturing, because capturing is only allowed for the
             // implicit capture that happens for the whole match at the end.
-            void EmitGreedyAlternate(RegexNode node)
+            void EmitAtomicAlternate(RegexNode node)
             {
                 // int startingTextSpanPos = textSpanPos;
                 // int startingRunTextPos = runtextpos;
@@ -1867,30 +1867,30 @@ namespace System.Text.RegularExpressions
                         EmitMultiChar(node);
                         break;
 
-                    case RegexNode.Oneloopgreedy:
-                    case RegexNode.Notoneloopgreedy:
-                    case RegexNode.Setloopgreedy:
+                    case RegexNode.Oneloopatomic:
+                    case RegexNode.Notoneloopatomic:
+                    case RegexNode.Setloopatomic:
                     case RegexNode.Loop:
-                        EmitGreedyLoop(node);
+                        EmitAtomicLoop(node);
                         break;
 
                     case RegexNode.Lazyloop:
-                        // A greedy lazy loop amounts to doing the minimum amount of work possible.
+                        // An atomic lazy loop amounts to doing the minimum amount of work possible.
                         // That means iterating as little as is required, which means a repeater
                         // for the min, and if min is 0, doing nothing.
-                        Debug.Assert(node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Greedy));
+                        Debug.Assert(node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic));
                         if (node.M > 0)
                         {
                             EmitRepeater(node, repeatChildNode: true, iterations: node.M);
                         }
                         break;
 
-                    case RegexNode.Greedy:
+                    case RegexNode.Atomic:
                         EmitNode(node.Child(0));
                         break;
 
                     case RegexNode.Alternate:
-                        EmitGreedyAlternate(node);
+                        EmitAtomicAlternate(node);
                         break;
 
                     case RegexNode.Oneloop:
@@ -1955,7 +1955,7 @@ namespace System.Text.RegularExpressions
                     case RegexNode.Set:
                     case RegexNode.Setlazy:
                     case RegexNode.Setloop:
-                    case RegexNode.Setloopgreedy:
+                    case RegexNode.Setloopatomic:
                         LocalBuilder setScratchLocal = RentInt32Local();
                         EmitCallCharInClass(node.Str!, IsCaseInsensitive(node), setScratchLocal);
                         ReturnInt32Local(setScratchLocal);
@@ -1965,14 +1965,14 @@ namespace System.Text.RegularExpressions
                     case RegexNode.One:
                     case RegexNode.Onelazy:
                     case RegexNode.Oneloop:
-                    case RegexNode.Oneloopgreedy:
+                    case RegexNode.Oneloopatomic:
                         if (IsCaseInsensitive(node)) CallToLower();
                         Ldc(node.Ch);
                         BneFar(doneLabel);
                         break;
 
                     default:
-                        Debug.Assert(node.Type == RegexNode.Notone || node.Type == RegexNode.Notonelazy || node.Type == RegexNode.Notoneloop || node.Type == RegexNode.Notoneloopgreedy);
+                        Debug.Assert(node.Type == RegexNode.Notone || node.Type == RegexNode.Notonelazy || node.Type == RegexNode.Notoneloop || node.Type == RegexNode.Notoneloopatomic);
                         if (IsCaseInsensitive(node)) CallToLower();
                         Ldc(node.Ch);
                         BeqFar(doneLabel);
@@ -2193,14 +2193,14 @@ namespace System.Text.RegularExpressions
                 ReturnInt32Local(iterationLocal);
             }
 
-            // Emits the code to handle a non-backtracking, variable-length loop (Oneloopgreedy or Setloopgreedy).
-            void EmitGreedyLoop(RegexNode node)
+            // Emits the code to handle a non-backtracking, variable-length loop (Oneloopatomic or Setloopatomic).
+            void EmitAtomicLoop(RegexNode node)
             {
                 Debug.Assert(
-                    node.Type == RegexNode.Oneloopgreedy ||
-                    node.Type == RegexNode.Notoneloopgreedy ||
-                    node.Type == RegexNode.Setloopgreedy ||
-                    (node.Type == RegexNode.Loop && (node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Greedy))));
+                    node.Type == RegexNode.Oneloopatomic ||
+                    node.Type == RegexNode.Notoneloopatomic ||
+                    node.Type == RegexNode.Setloopatomic ||
+                    (node.Type == RegexNode.Loop && (node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic))));
                 Debug.Assert(node.M < int.MaxValue);
 
                 // First generate the code to handle the required number of iterations.
@@ -2217,9 +2217,9 @@ namespace System.Text.RegularExpressions
                     Label originalDoneLabel = doneLabel;
                     doneLabel = DefineLabel();
 
-                    if (node.Type == RegexNode.Notoneloopgreedy && node.N == int.MaxValue && !IsCaseInsensitive(node))
+                    if (node.Type == RegexNode.Notoneloopatomic && node.N == int.MaxValue && !IsCaseInsensitive(node))
                     {
-                        // For Notoneloopgreedy, we're looking for a specific character, as everything until we find
+                        // For Notoneloopatomic, we're looking for a specific character, as everything until we find
                         // it is consumed by the loop.  If we're unbounded, such as with ".*" and if we're case-sensitive,
                         // we can use the vectorized IndexOf to do the search, rather than open-coding it. (In the future,
                         // we could consider using IndexOf with StringComparison for case insensitivity.)
@@ -2348,17 +2348,17 @@ namespace System.Text.RegularExpressions
                             LdindU2();
                             switch (node.Type)
                             {
-                                case RegexNode.Oneloopgreedy:
+                                case RegexNode.Oneloopatomic:
                                     if (IsCaseInsensitive(node)) CallToLower();
                                     Ldc(node.Ch);
                                     BneFar(doneLabel);
                                     break;
-                                case RegexNode.Notoneloopgreedy:
+                                case RegexNode.Notoneloopatomic:
                                     if (IsCaseInsensitive(node)) CallToLower();
                                     Ldc(node.Ch);
                                     BeqFar(doneLabel);
                                     break;
-                                case RegexNode.Setloopgreedy:
+                                case RegexNode.Setloopatomic:
                                     LocalBuilder setScratchLocal = RentInt32Local();
                                     EmitCallCharInClass(node.Str!, IsCaseInsensitive(node), setScratchLocal);
                                     ReturnInt32Local(setScratchLocal);
@@ -3705,18 +3705,18 @@ namespace System.Text.RegularExpressions
                 case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Rtl:
                 case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl:
                 case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl:
-                case RegexCode.Oneloopgreedy:
-                case RegexCode.Notoneloopgreedy:
-                case RegexCode.Setloopgreedy:
-                case RegexCode.Oneloopgreedy | RegexCode.Rtl:
-                case RegexCode.Notoneloopgreedy | RegexCode.Rtl:
-                case RegexCode.Setloopgreedy | RegexCode.Rtl:
-                case RegexCode.Oneloopgreedy | RegexCode.Ci:
-                case RegexCode.Notoneloopgreedy | RegexCode.Ci:
-                case RegexCode.Setloopgreedy | RegexCode.Ci:
-                case RegexCode.Oneloopgreedy | RegexCode.Ci | RegexCode.Rtl:
-                case RegexCode.Notoneloopgreedy | RegexCode.Ci | RegexCode.Rtl:
-                case RegexCode.Setloopgreedy | RegexCode.Ci | RegexCode.Rtl:
+                case RegexCode.Oneloopatomic:
+                case RegexCode.Notoneloopatomic:
+                case RegexCode.Setloopatomic:
+                case RegexCode.Oneloopatomic | RegexCode.Rtl:
+                case RegexCode.Notoneloopatomic | RegexCode.Rtl:
+                case RegexCode.Setloopatomic | RegexCode.Rtl:
+                case RegexCode.Oneloopatomic | RegexCode.Ci:
+                case RegexCode.Notoneloopatomic | RegexCode.Ci:
+                case RegexCode.Setloopatomic | RegexCode.Ci:
+                case RegexCode.Oneloopatomic | RegexCode.Ci | RegexCode.Rtl:
+                case RegexCode.Notoneloopatomic | RegexCode.Ci | RegexCode.Rtl:
+                case RegexCode.Setloopatomic | RegexCode.Ci | RegexCode.Rtl:
                     //: int c = Operand(1);
                     //: if (c > Rightchars())
                     //:     c = Rightchars();
@@ -3779,7 +3779,7 @@ namespace System.Text.RegularExpressions
                         Dup();
                         Stloc(cLocal);
                         Ldc(0);
-                        if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopgreedy)
+                        if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic)
                         {
                             BleFar(l2);
                         }
@@ -3797,7 +3797,7 @@ namespace System.Text.RegularExpressions
                             Rightcharnext();
                         }
 
-                        if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopgreedy)
+                        if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic)
                         {
                             EmitTimeoutCheck();
                             EmitCallCharInClass(_strings![Operand(0)], IsCaseInsensitive(), charInClassLocal);
@@ -3811,13 +3811,13 @@ namespace System.Text.RegularExpressions
                             }
 
                             Ldc(Operand(0));
-                            if (Code() == RegexCode.Oneloop || Code() == RegexCode.Oneloopgreedy)
+                            if (Code() == RegexCode.Oneloop || Code() == RegexCode.Oneloopatomic)
                             {
                                 Beq(l1);
                             }
                             else
                             {
-                                Debug.Assert(Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopgreedy);
+                                Debug.Assert(Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic);
                                 Bne(l1);
                             }
                         }
@@ -3829,7 +3829,7 @@ namespace System.Text.RegularExpressions
 
                         MarkLabel(l2);
 
-                        if (Code() != RegexCode.Oneloopgreedy && Code() != RegexCode.Notoneloopgreedy && Code() != RegexCode.Setloopgreedy)
+                        if (Code() != RegexCode.Oneloopatomic && Code() != RegexCode.Notoneloopatomic && Code() != RegexCode.Setloopatomic)
                         {
                             Ldloc(lenLocal);
                             Ldloc(cLocal);
index 3f595b1..a1d9e0f 100644 (file)
@@ -94,14 +94,14 @@ namespace System.Text.RegularExpressions
                         }
                         break;
 
-                    case RegexNode.Greedy:
+                    case RegexNode.Atomic:
                     case RegexNode.Capture:
                         curNode = curNode.Child(0);
                         concatNode = null;
                         continue;
 
                     case RegexNode.Oneloop:
-                    case RegexNode.Oneloopgreedy:
+                    case RegexNode.Oneloopatomic:
                     case RegexNode.Onelazy:
 
                         // In release, cutoff at a length to which we can still reasonably construct a string
@@ -176,7 +176,7 @@ namespace System.Text.RegularExpressions
                         }
                         break;
 
-                    case RegexNode.Greedy:
+                    case RegexNode.Atomic:
                     case RegexNode.Capture:
                         curNode = curNode.Child(0);
                         concatNode = null;
@@ -420,8 +420,8 @@ namespace System.Text.RegularExpressions
                 case RegexNode.Group | AfterChild:
                 case RegexNode.Capture | BeforeChild:
                 case RegexNode.Capture | AfterChild:
-                case RegexNode.Greedy | BeforeChild:
-                case RegexNode.Greedy | AfterChild:
+                case RegexNode.Atomic | BeforeChild:
+                case RegexNode.Atomic | AfterChild:
                     break;
 
                 case RegexNode.Require | BeforeChild:
@@ -440,13 +440,13 @@ namespace System.Text.RegularExpressions
                     break;
 
                 case RegexNode.Oneloop:
-                case RegexNode.Oneloopgreedy:
+                case RegexNode.Oneloopatomic:
                 case RegexNode.Onelazy:
                     PushFC(new RegexFC(node.Ch, false, node.M == 0, ci));
                     break;
 
                 case RegexNode.Notoneloop:
-                case RegexNode.Notoneloopgreedy:
+                case RegexNode.Notoneloopatomic:
                 case RegexNode.Notonelazy:
                     PushFC(new RegexFC(node.Ch, true, node.M == 0, ci));
                     break;
@@ -465,7 +465,7 @@ namespace System.Text.RegularExpressions
                     break;
 
                 case RegexNode.Setloop:
-                case RegexNode.Setloopgreedy:
+                case RegexNode.Setloopatomic:
                 case RegexNode.Setlazy:
                     PushFC(new RegexFC(node.Str!, node.M == 0, ci));
                     break;
index 895a0a1..27f9a38 100644 (file)
@@ -1133,7 +1133,7 @@ namespace System.Text.RegularExpressions
                         }
 
                     case RegexCode.Oneloop:
-                    case RegexCode.Oneloopgreedy:
+                    case RegexCode.Oneloopatomic:
                         {
                             int c = Operand(1);
 
@@ -1163,7 +1163,7 @@ namespace System.Text.RegularExpressions
                         }
 
                     case RegexCode.Notoneloop:
-                    case RegexCode.Notoneloopgreedy:
+                    case RegexCode.Notoneloopatomic:
                         {
                             int c = Operand(1);
 
@@ -1193,7 +1193,7 @@ namespace System.Text.RegularExpressions
                         }
 
                     case RegexCode.Setloop:
-                    case RegexCode.Setloopgreedy:
+                    case RegexCode.Setloopatomic:
                         {
                             int c = Operand(1);
 
index f3b665c..36476ae 100644 (file)
@@ -77,9 +77,9 @@ namespace System.Text.RegularExpressions
         public const int EndZ = RegexCode.EndZ;                       //          \Z
         public const int End = RegexCode.End;                         //          \z
 
-        public const int Oneloopgreedy = RegexCode.Oneloopgreedy;        // c,n      (?> a*)
-        public const int Notoneloopgreedy = RegexCode.Notoneloopgreedy;  // c,n      (?> .*)
-        public const int Setloopgreedy = RegexCode.Setloopgreedy;        // set,n    (?> \d*)
+        public const int Oneloopatomic = RegexCode.Oneloopatomic;        // c,n      (?> a*)
+        public const int Notoneloopatomic = RegexCode.Notoneloopatomic;  // c,n      (?> .*)
+        public const int Setloopatomic = RegexCode.Setloopatomic;        // set,n    (?> \d*)
 
         // Interior nodes do not correspond to primitive operations, but
         // control structures compositing other operations
@@ -99,7 +99,7 @@ namespace System.Text.RegularExpressions
         public const int Group = 29;                                  //          (?:)       - noncapturing group
         public const int Require = 30;                                //          (?=) (?<=) - lookahead and lookbehind assertions
         public const int Prevent = 31;                                //          (?!) (?<!) - negative lookahead and lookbehind assertions
-        public const int Greedy = 32;                                 //          (?>)       - greedy subexpression
+        public const int Atomic = 32;                                 //          (?>)       - atomic subexpression
         public const int Testref = 33;                                //          (?(n) | )  - alternation, reference
         public const int Testgroup = 34;                              //          (?(...) | )- alternation, expression
 
@@ -189,15 +189,15 @@ namespace System.Text.RegularExpressions
                     switch (node.Type)
                     {
                         case Oneloop:
-                            node.Type = Oneloopgreedy;
+                            node.Type = Oneloopatomic;
                             break;
 
                         case Notoneloop:
-                            node.Type = Notoneloopgreedy;
+                            node.Type = Notoneloopatomic;
                             break;
 
                         case Setloop:
-                            node.Type = Setloopgreedy;
+                            node.Type = Setloopatomic;
                             break;
 
                         case Capture:
@@ -212,14 +212,14 @@ namespace System.Text.RegularExpressions
                                 case Alternate:
                                 case Loop:
                                 case Lazyloop:
-                                    var greedy = new RegexNode(Greedy, Options);
-                                    greedy.AddChild(existingChild);
-                                    node.ReplaceChild(node.ChildCount() - 1, greedy);
+                                    var atomic = new RegexNode(Atomic, Options);
+                                    atomic.AddChild(existingChild);
+                                    node.ReplaceChild(node.ChildCount() - 1, atomic);
                                     break;
                             }
                             continue;
 
-                        case Greedy:
+                        case Atomic:
                             node = node.Child(0);
                             continue;
                     }
@@ -254,8 +254,8 @@ namespace System.Text.RegularExpressions
                     n = ReduceLoops();
                     break;
 
-                case Greedy:
-                    n = ReduceGreedy();
+                case Atomic:
+                    n = ReduceAtomic();
                     break;
 
                 case Group:
@@ -306,33 +306,32 @@ namespace System.Text.RegularExpressions
         }
 
         /// <summary>
-        /// Simple optimization. If a greedy subexpression contains only a set loop
-        /// or a one loop, change them to be a greedy set loop or greedy one loop,
-        /// and remove the greedy node.
+        /// Simple optimization. If an atomic subexpression contains only a one/notone/set loop,
+        /// change it to be an atomic one/notone/set loop and remove the atomic node.
         /// </summary>
-        private RegexNode ReduceGreedy()
+        private RegexNode ReduceAtomic()
         {
-            Debug.Assert(Type == Greedy);
+            Debug.Assert(Type == Atomic);
             Debug.Assert(ChildCount() == 1);
 
             RegexNode child = Child(0);
             switch (child.Type)
             {
                 case Oneloop:
-                    child.Type = Oneloopgreedy;
+                    child.Type = Oneloopatomic;
                     return child;
 
                 case Notoneloop:
-                    child.Type = Notoneloopgreedy;
+                    child.Type = Notoneloopatomic;
                     return child;
 
                 case Setloop:
-                    child.Type = Setloopgreedy;
+                    child.Type = Setloopatomic;
                     return child;
 
-                case Oneloopgreedy:
-                case Notoneloopgreedy:
-                case Setloopgreedy:
+                case Oneloopatomic:
+                case Notoneloopatomic:
+                case Setloopatomic:
                     return child;
             }
 
@@ -367,11 +366,11 @@ namespace System.Text.RegularExpressions
                         switch (child.Type)
                         {
                             case Oneloop:
-                            case Oneloopgreedy:
+                            case Oneloopatomic:
                             case Notoneloop:
-                            case Notoneloopgreedy:
+                            case Notoneloopatomic:
                             case Setloop:
-                            case Setloopgreedy:
+                            case Setloopatomic:
                                 valid = true;
                                 break;
                         }
@@ -699,10 +698,10 @@ namespace System.Text.RegularExpressions
                 children.RemoveRange(j, i - j);
             }
 
-            // Now try to convert as many loops as possible to be greedy to avoid unnecessary backtracking.
+            // Now try to convert as many loops as possible to be atomic to avoid unnecessary backtracking.
             if ((Options & RegexOptions.RightToLeft) == 0)
             {
-                ReduceConcatenateWithAutoGreedy();
+                ReduceConcatenateWithAutoAtomic();
             }
 
             // If the concatenation is now empty, return an empty node, or if it's got a single child, return that child.
@@ -712,11 +711,11 @@ namespace System.Text.RegularExpressions
 
         /// <summary>
         /// Finds oneloop and setloop nodes in the concatenation that can be automatically upgraded
-        /// to oneloopgreedy and setloopgreedy nodes.  Such changes avoid potential useless backtracking.
+        /// to oneloopatomic and setloopatomic nodes.  Such changes avoid potential useless backtracking.
         /// This looks for cases like A*B, where A and B are known to not overlap: in such cases,
         /// we can effectively convert this to (?>A*)B.
         /// </summary>
-        private void ReduceConcatenateWithAutoGreedy()
+        private void ReduceConcatenateWithAutoAtomic()
         {
             Debug.Assert(Type == Concatenate);
             Debug.Assert((Options & RegexOptions.RightToLeft) == 0);
@@ -743,7 +742,7 @@ namespace System.Text.RegularExpressions
                     switch (subsequent.Type)
                     {
                         case Capture:
-                        case Greedy:
+                        case Atomic:
                         case Require:
                         case Concatenate:
                         case Loop when subsequent.M > 0:
@@ -764,7 +763,7 @@ namespace System.Text.RegularExpressions
                 }
 
                 // If this node is a one/notone/setloop, see if it overlaps with its successor in the concatenation.
-                // If it doesn't, then we can upgrade it to being a one/notone/setloopgreedy.
+                // If it doesn't, then we can upgrade it to being a one/notone/setloopatomic.
                 // Doing so avoids unnecessary backtracking.
                 switch (node.Type)
                 {
@@ -774,16 +773,16 @@ namespace System.Text.RegularExpressions
                             case One when node.Ch != subsequent.Ch:
                             case Onelazy when subsequent.M > 0 && node.Ch != subsequent.Ch:
                             case Oneloop when subsequent.M > 0 && node.Ch != subsequent.Ch:
-                            case Oneloopgreedy when subsequent.M > 0 && node.Ch != subsequent.Ch:
+                            case Oneloopatomic when subsequent.M > 0 && node.Ch != subsequent.Ch:
                             case Notone when node.Ch == subsequent.Ch:
                             case Notonelazy when subsequent.M > 0 && node.Ch == subsequent.Ch:
                             case Notoneloop when subsequent.M > 0 && node.Ch == subsequent.Ch:
-                            case Notoneloopgreedy when subsequent.M > 0 && node.Ch == subsequent.Ch:
+                            case Notoneloopatomic when subsequent.M > 0 && node.Ch == subsequent.Ch:
                             case Multi when node.Ch != subsequent.Str![0]:
                             case Set when !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
                             case Setlazy when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
                             case Setloop when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
-                            case Setloopgreedy when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
+                            case Setloopatomic when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
                             case End:
                             case EndZ when node.Ch != '\n':
                             case Eol when node.Ch != '\n':
@@ -791,7 +790,7 @@ namespace System.Text.RegularExpressions
                             case Nonboundary when !RegexCharClass.IsWordChar(node.Ch):
                             case ECMABoundary when RegexCharClass.IsECMAWordChar(node.Ch):
                             case NonECMABoundary when !RegexCharClass.IsECMAWordChar(node.Ch):
-                                node.Type = Oneloopgreedy;
+                                node.Type = Oneloopatomic;
                                 break;
                         }
                         break;
@@ -802,10 +801,10 @@ namespace System.Text.RegularExpressions
                             case One when node.Ch == subsequent.Ch:
                             case Onelazy when subsequent.M > 0 && node.Ch == subsequent.Ch:
                             case Oneloop when subsequent.M > 0 && node.Ch == subsequent.Ch:
-                            case Oneloopgreedy when subsequent.M > 0 && node.Ch == subsequent.Ch:
+                            case Oneloopatomic when subsequent.M > 0 && node.Ch == subsequent.Ch:
                             case Multi when node.Ch == subsequent.Str![0]:
                             case End:
-                                node.Type = Notoneloopgreedy;
+                                node.Type = Notoneloopatomic;
                                 break;
                         }
                         break;
@@ -816,16 +815,16 @@ namespace System.Text.RegularExpressions
                             case One when !RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
                             case Onelazy when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
                             case Oneloop when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
-                            case Oneloopgreedy when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
+                            case Oneloopatomic when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
                             case Notone when RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
                             case Notonelazy when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
                             case Notoneloop when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
-                            case Notoneloopgreedy when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
+                            case Notoneloopatomic when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
                             case Multi when !RegexCharClass.CharInClass(subsequent.Str![0], node.Str!):
                             case Set when !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
                             case Setlazy when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
                             case Setloop when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
-                            case Setloopgreedy when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
+                            case Setloopatomic when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
                             case End:
                             case EndZ when !RegexCharClass.CharInClass('\n', node.Str!):
                             case Eol when !RegexCharClass.CharInClass('\n', node.Str!):
@@ -833,7 +832,7 @@ namespace System.Text.RegularExpressions
                             case Nonboundary when node.Str == RegexCharClass.NotWordClass || node.Str == RegexCharClass.NotDigitClass:
                             case ECMABoundary when node.Str == RegexCharClass.ECMAWordClass || node.Str == RegexCharClass.ECMADigitClass:
                             case NonECMABoundary when node.Str == RegexCharClass.NotECMAWordClass || node.Str == RegexCharClass.NotDigitClass:
-                                node.Type = Setloopgreedy;
+                                node.Type = Setloopatomic;
                                 break;
                         }
                         break;
@@ -937,11 +936,11 @@ namespace System.Text.RegularExpressions
             "Nothing", "Empty",
             "Alternate", "Concatenate",
             "Loop", "Lazyloop",
-            "Capture", "Group", "Require", "Prevent", "Greedy",
+            "Capture", "Group", "Require", "Prevent", "Atomic",
             "Testref", "Testgroup",
             "", "", "", "", "", "",
             "ECMABoundary", "NonECMABoundary",
-            "Oneloopgreedy", "Notoneloopgreedy", "Setloopgreedy",
+            "Oneloopatomic", "Notoneloopatomic", "Setloopatomic",
         };
 
         public string Description()
@@ -968,9 +967,9 @@ namespace System.Text.RegularExpressions
             switch (Type)
             {
                 case Oneloop:
-                case Oneloopgreedy:
+                case Oneloopatomic:
                 case Notoneloop:
-                case Notoneloopgreedy:
+                case Notoneloopatomic:
                 case Onelazy:
                 case Notonelazy:
                 case One:
@@ -989,7 +988,7 @@ namespace System.Text.RegularExpressions
                     break;
                 case Set:
                 case Setloop:
-                case Setloopgreedy:
+                case Setloopatomic:
                 case Setlazy:
                     argSb.Append("(Set = " + RegexCharClass.SetDescription(Str!) + ")");
                     break;
@@ -998,13 +997,13 @@ namespace System.Text.RegularExpressions
             switch (Type)
             {
                 case Oneloop:
-                case Oneloopgreedy:
+                case Oneloopatomic:
                 case Notoneloop:
-                case Notoneloopgreedy:
+                case Notoneloopatomic:
                 case Onelazy:
                 case Notonelazy:
                 case Setloop:
-                case Setloopgreedy:
+                case Setloopatomic:
                 case Setlazy:
                 case Loop:
                 case Lazyloop:
index 171cb65..393a468 100644 (file)
@@ -802,8 +802,8 @@ namespace System.Text.RegularExpressions
                         break;
 
                     case '>':
-                        // greedy subexpression
-                        nodeType = RegexNode.Greedy;
+                        // atomic subexpression
+                        nodeType = RegexNode.Atomic;
                         break;
 
                     case '\'':
index b8f30b5..9dd8d40 100644 (file)
@@ -428,11 +428,11 @@ namespace System.Text.RegularExpressions
                     Emit(RegexCode.Forejump);
                     break;
 
-                case RegexNode.Greedy | BeforeChild:
+                case RegexNode.Atomic | BeforeChild:
                     Emit(RegexCode.Setjump);
                     break;
 
-                case RegexNode.Greedy | AfterChild:
+                case RegexNode.Atomic | AfterChild:
                     Emit(RegexCode.Forejump);
                     break;
 
@@ -442,14 +442,14 @@ namespace System.Text.RegularExpressions
                     break;
 
                 case RegexNode.Notoneloop:
-                case RegexNode.Notoneloopgreedy:
+                case RegexNode.Notoneloopatomic:
                 case RegexNode.Notonelazy:
                 case RegexNode.Oneloop:
-                case RegexNode.Oneloopgreedy:
+                case RegexNode.Oneloopatomic:
                 case RegexNode.Onelazy:
                     if (node.M > 0)
                     {
-                        Emit(((node.Type == RegexNode.Oneloop || node.Type == RegexNode.Oneloopgreedy || node.Type == RegexNode.Onelazy) ?
+                        Emit(((node.Type == RegexNode.Oneloop || node.Type == RegexNode.Oneloopatomic || node.Type == RegexNode.Onelazy) ?
                               RegexCode.Onerep : RegexCode.Notonerep) | bits, node.Ch, node.M);
                     }
                     if (node.N > node.M)
@@ -459,7 +459,7 @@ namespace System.Text.RegularExpressions
                     break;
 
                 case RegexNode.Setloop:
-                case RegexNode.Setloopgreedy:
+                case RegexNode.Setloopatomic:
                 case RegexNode.Setlazy:
                     {
                         int stringCode = StringCode(node.Str!);
index 13a7c26..9a4ca22 100644 (file)
@@ -642,8 +642,8 @@ namespace System.Text.RegularExpressions.Tests
             yield return new object[] { null, @"(cat){5,dog}?", "cat{5,dog}?", RegexOptions.None, new string[] { "cat{5,dog}", "cat" } };
             yield return new object[] { null, @"(cat){cat,dog}?", "cat{cat,dog}?", RegexOptions.None, new string[] { "cat{cat,dog}", "cat" } };
 
-            // Atomic ("greedy") subexpressions
-            // Implicitly upgrading oneloop to be greedy
+            // Atomic subexpressions
+            // Implicitly upgrading oneloop to be atomic
             yield return new object[] { null, @"a*", "aaa", RegexOptions.None, new string[] { "aaa" } };
             yield return new object[] { null, @"a*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
             yield return new object[] { null, @"a*b+", "aaab", RegexOptions.None, new string[] { "aaab" } };
@@ -665,14 +665,14 @@ namespace System.Text.RegularExpressions.Tests
             yield return new object[] { null, @"a*\b", "aaa bbb", RegexOptions.ECMAScript, new string[] { "aaa" } };
             yield return new object[] { null, @"@*\B", "@@@", RegexOptions.None, new string[] { "@@@" } };
             yield return new object[] { null, @"@*\B", "@@@", RegexOptions.ECMAScript, new string[] { "@@@" } };
-            // Implicitly upgrading notoneloop to be greedy
+            // Implicitly upgrading notoneloop to be atomic
             yield return new object[] { null, @"[^b]*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
             yield return new object[] { null, @"[^b]*b+", "aaab", RegexOptions.None, new string[] { "aaab" } };
             yield return new object[] { null, @"[^b]*b+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
             yield return new object[] { null, @"[^b]*(?>b+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
             yield return new object[] { null, @"[^b]*bac", "aaabac", RegexOptions.None, new string[] { "aaabac" } };
             yield return new object[] { null, @"[^b]*", "aaa", RegexOptions.None, new string[] { "aaa" } };
-            // Implicitly upgrading setloop to be greedy
+            // Implicitly upgrading setloop to be atomic
             yield return new object[] { null, @"[ac]*", "aaa", RegexOptions.None, new string[] { "aaa" } };
             yield return new object[] { null, @"[ac]*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
             yield return new object[] { null, @"[ac]*b+", "aaab", RegexOptions.None, new string[] { "aaab" } };
@@ -694,7 +694,7 @@ namespace System.Text.RegularExpressions.Tests
             yield return new object[] { null, @"[ac]*\b", "aaa bbb", RegexOptions.ECMAScript, new string[] { "aaa" } };
             yield return new object[] { null, @"[@']*\B", "@@@", RegexOptions.None, new string[] { "@@@" } };
             yield return new object[] { null, @"[@']*\B", "@@@", RegexOptions.ECMAScript, new string[] { "@@@" } };
-            // Implicitly upgrading nested loops to be greedy
+            // Implicitly upgrading nested loops to be atomic
             yield return new object[] { null, @"(?:a){3}", "aaaaaaaaa", RegexOptions.None, new string[] { "aaa" } };
             yield return new object[] { null, @"(?:a){3}?", "aaaaaaaaa", RegexOptions.None, new string[] { "aaa" } };
             yield return new object[] { null, @"(?:a{2}){3}", "aaaaaaaaa", RegexOptions.None, new string[] { "aaaaaa" } };