Rename "Greedy" subexpressions to "Atomic"

author Stephen Toub <stoub@microsoft.com>

Mon, 6 Jan 2020 21:43:05 +0000 (16:43 -0500)

committer Stephen Toub <stoub@microsoft.com>

Thu, 9 Jan 2020 03:50:10 +0000 (22:50 -0500)
author Stephen Toub <stoub@microsoft.com>
Mon, 6 Jan 2020 21:43:05 +0000 (16:43 -0500)
committer Stephen Toub <stoub@microsoft.com>
Thu, 9 Jan 2020 03:50:10 +0000 (22:50 -0500)
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs

index bc4e4cb..20eefbc 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs
@@ -84,9 +84,9 @@ namespace System.Text.RegularExpressions
          // Manufactured primitive operations, derived from the tree that comes from the parser.
          // These exist to reduce backtracking (both actually performing it and spitting code for it).
  
-        public const int Oneloopgreedy = 43;      // lef,back char,min,max    (?> a {,n} )
-        public const int Notoneloopgreedy = 44;   // lef,back set,min,max     (?> . {,n} )
-        public const int Setloopgreedy = 45;      // lef,back set,min,max     (?> [\d]{,n} )
+        public const int Oneloopatomic = 43;      // lef,back char,min,max    (?> a {,n} )
+        public const int Notoneloopatomic = 44;   // lef,back set,min,max     (?> . {,n} )
+        public const int Setloopatomic = 45;      // lef,back set,min,max     (?> [\d]{,n} )
  
          // Modifiers for alternate modes
          public const int Mask = 63;   // Mask to get unmodified ordinary operator
@@ -206,15 +206,15 @@ namespace System.Text.RegularExpressions
                  case Onerep:
                  case Notonerep:
                  case Oneloop:
-                case Oneloopgreedy:
+                case Oneloopatomic:
                  case Notoneloop:
-                case Notoneloopgreedy:
+                case Notoneloopatomic:
                  case Onelazy:
                  case Notonelazy:
                  case Setlazy:
                  case Setrep:
                  case Setloop:
-                case Setloopgreedy:
+                case Setloopatomic:
                      return 3;
  
                  default:
@@ -238,7 +238,7 @@ namespace System.Text.RegularExpressions
              "Setjump", "Backjump", "Forejump", "Testref", "Goto",
              "Prune", "Stop",
              "ECMABoundary", "NonECMABoundary",
-            "Oneloopgreedy", "Notoneloopgreedy", "Setloopgreedy"
+            "Oneloopatomic", "Notoneloopatomic", "Setloopatomic"
          };
  
          private static string OperatorDescription(int Opcode)
@@ -275,9 +275,9 @@ namespace System.Text.RegularExpressions
                  case Onerep:
                  case Notonerep:
                  case Oneloop:
-                case Oneloopgreedy:
+                case Oneloopatomic:
                  case Notoneloop:
-                case Notoneloopgreedy:
+                case Notoneloopatomic:
                  case Onelazy:
                  case Notonelazy:
                      sb.Append("Ch = ");
@@ -287,7 +287,7 @@ namespace System.Text.RegularExpressions
                  case Set:
                  case Setrep:
                  case Setloop:
-                case Setloopgreedy:
+                case Setloopatomic:
                  case Setlazy:
                      sb.Append("Set = ");
                      sb.Append(RegexCharClass.SetDescription(Strings[Codes[offset + 1]]));
@@ -336,14 +336,14 @@ namespace System.Text.RegularExpressions
                  case Onerep:
                  case Notonerep:
                  case Oneloop:
-                case Oneloopgreedy:
+                case Oneloopatomic:
                  case Notoneloop:
-                case Notoneloopgreedy:
+                case Notoneloopatomic:
                  case Onelazy:
                  case Notonelazy:
                  case Setrep:
                  case Setloop:
-                case Setloopgreedy:
+                case Setloopatomic:
                  case Setlazy:
                      sb.Append(", Rep = ");
                      if (Codes[offset + 2] == int.MaxValue)
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs

index e1cb3f5..c969618 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
@@ -1545,12 +1545,12 @@ namespace System.Text.RegularExpressions
                          case RegexNode.Eol:
                          case RegexNode.End:
                          case RegexNode.EndZ:
-                        // {Set/One}loopgreedy are optimized nodes that represent non-backtracking variable-length loops.
+                        // {Set/One/Notone}loopatomic are optimized nodes that represent non-backtracking variable-length loops.
                          // These consume their {Set/One} inputs as long as they match, and don't give up anything they
                          // matched, which means we can support them without backtracking.
-                        case RegexNode.Oneloopgreedy:
-                        case RegexNode.Notoneloopgreedy:
-                        case RegexNode.Setloopgreedy:
+                        case RegexNode.Oneloopatomic:
+                        case RegexNode.Notoneloopatomic:
+                        case RegexNode.Setloopatomic:
                          // "Empty" is easy: nothing is emitted for it.
                          // "Nothing" is also easy: it doesn't match anything.
                          case RegexNode.Empty:
@@ -1564,7 +1564,7 @@ namespace System.Text.RegularExpressions
                          case RegexNode.Oneloop:
                          case RegexNode.Notoneloop:
                          case RegexNode.Setloop:
-                            Debug.Assert(node.Next == null || node.Next.Type != RegexNode.Greedy, "Loop should have been transformed into a greedy type.");
+                            Debug.Assert(node.Next == null || node.Next.Type != RegexNode.Atomic, "Loop should have been transformed into an atomic type.");
                              goto case RegexNode.Onelazy;
                          case RegexNode.Onelazy:
                          case RegexNode.Notonelazy:
@@ -1573,17 +1573,17 @@ namespace System.Text.RegularExpressions
                              break;
  
                          // {Lazy}Loop repeaters are the same, except their child also needs to be supported.
-                        // We also support such loops being greedy.
+                        // We also support such loops being atomic.
                          case RegexNode.Loop:
                          case RegexNode.Lazyloop:
                              supported =
-                                (node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Greedy)) &&
+                                (node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic)) &&
                                  NodeSupportsNonBacktrackingImplementation(node.Child(0), level + 1);
                              break;
  
-                        // We can handle greedy as long as we can handle making its child greedy, or
+                        // We can handle atomic as long as we can handle making its child atomic, or
                          // its child doesn't have that concept.
-                        case RegexNode.Greedy:
+                        case RegexNode.Atomic:
                          // Lookahead assertions also only require that the child node be supported.
                          // The RightToLeft check earlier is important to differentiate lookbehind,
                          // which is not supported.
@@ -1592,12 +1592,12 @@ namespace System.Text.RegularExpressions
                              supported = NodeSupportsNonBacktrackingImplementation(node.Child(0), level + 1);
                              break;
  
-                        // We can handle alternates as long as they're greedy (a root / global alternate is
-                        // effectively greedy, as nothing will try to backtrack into it as it's the last thing).
+                        // We can handle alternates as long as they're atomic (a root / global alternate is
+                        // effectively atomic, as nothing will try to backtrack into it as it's the last thing).
                          // Its children must all also be supported.
                          case RegexNode.Alternate:
                              if (node.Next != null &&
-                                (node.Next.Type == RegexNode.Greedy || // greedy alternate
+                                (node.Next.Type == RegexNode.Atomic || // atomic alternate
                                  (node.Next.Type == RegexNode.Capture && node.Next.Next is null))) // root alternate
                              {
                                  goto case RegexNode.Concatenate;
@@ -1710,11 +1710,11 @@ namespace System.Text.RegularExpressions
                  }
              }
  
-            // Emits the code for a greedy alternate, one that once a branch successfully matches is non-backtracking into it.
+            // Emits the code for an atomic alternate, one that once a branch successfully matches is non-backtracking into it.
              // This amounts to generating the code for each branch, with failures in a branch resetting state to what it was initially
              // and then jumping to the next branch. We don't need to worry about uncapturing, because capturing is only allowed for the
              // implicit capture that happens for the whole match at the end.
-            void EmitGreedyAlternate(RegexNode node)
+            void EmitAtomicAlternate(RegexNode node)
              {
                  // int startingTextSpanPos = textSpanPos;
                  // int startingRunTextPos = runtextpos;
@@ -1867,30 +1867,30 @@ namespace System.Text.RegularExpressions
                          EmitMultiChar(node);
                          break;
  
-                    case RegexNode.Oneloopgreedy:
-                    case RegexNode.Notoneloopgreedy:
-                    case RegexNode.Setloopgreedy:
+                    case RegexNode.Oneloopatomic:
+                    case RegexNode.Notoneloopatomic:
+                    case RegexNode.Setloopatomic:
                      case RegexNode.Loop:
-                        EmitGreedyLoop(node);
+                        EmitAtomicLoop(node);
                          break;
  
                      case RegexNode.Lazyloop:
-                        // A greedy lazy loop amounts to doing the minimum amount of work possible.
+                        // An atomic lazy loop amounts to doing the minimum amount of work possible.
                          // That means iterating as little as is required, which means a repeater
                          // for the min, and if min is 0, doing nothing.
-                        Debug.Assert(node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Greedy));
+                        Debug.Assert(node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic));
                          if (node.M > 0)
                          {
                              EmitRepeater(node, repeatChildNode: true, iterations: node.M);
                          }
                          break;
  
-                    case RegexNode.Greedy:
+                    case RegexNode.Atomic:
                          EmitNode(node.Child(0));
                          break;
  
                      case RegexNode.Alternate:
-                        EmitGreedyAlternate(node);
+                        EmitAtomicAlternate(node);
                          break;
  
                      case RegexNode.Oneloop:
@@ -1955,7 +1955,7 @@ namespace System.Text.RegularExpressions
                      case RegexNode.Set:
                      case RegexNode.Setlazy:
                      case RegexNode.Setloop:
-                    case RegexNode.Setloopgreedy:
+                    case RegexNode.Setloopatomic:
                          LocalBuilder setScratchLocal = RentInt32Local();
                          EmitCallCharInClass(node.Str!, IsCaseInsensitive(node), setScratchLocal);
                          ReturnInt32Local(setScratchLocal);
@@ -1965,14 +1965,14 @@ namespace System.Text.RegularExpressions
                      case RegexNode.One:
                      case RegexNode.Onelazy:
                      case RegexNode.Oneloop:
-                    case RegexNode.Oneloopgreedy:
+                    case RegexNode.Oneloopatomic:
                          if (IsCaseInsensitive(node)) CallToLower();
                          Ldc(node.Ch);
                          BneFar(doneLabel);
                          break;
  
                      default:
-                        Debug.Assert(node.Type == RegexNode.Notone || node.Type == RegexNode.Notonelazy || node.Type == RegexNode.Notoneloop || node.Type == RegexNode.Notoneloopgreedy);
+                        Debug.Assert(node.Type == RegexNode.Notone || node.Type == RegexNode.Notonelazy || node.Type == RegexNode.Notoneloop || node.Type == RegexNode.Notoneloopatomic);
                          if (IsCaseInsensitive(node)) CallToLower();
                          Ldc(node.Ch);
                          BeqFar(doneLabel);
@@ -2193,14 +2193,14 @@ namespace System.Text.RegularExpressions
                  ReturnInt32Local(iterationLocal);
              }
  
-            // Emits the code to handle a non-backtracking, variable-length loop (Oneloopgreedy or Setloopgreedy).
-            void EmitGreedyLoop(RegexNode node)
+            // Emits the code to handle a non-backtracking, variable-length loop (Oneloopatomic or Setloopatomic).
+            void EmitAtomicLoop(RegexNode node)
              {
                  Debug.Assert(
-                    node.Type == RegexNode.Oneloopgreedy ||
-                    node.Type == RegexNode.Notoneloopgreedy ||
-                    node.Type == RegexNode.Setloopgreedy ||
-                    (node.Type == RegexNode.Loop && (node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Greedy))));
+                    node.Type == RegexNode.Oneloopatomic ||
+                    node.Type == RegexNode.Notoneloopatomic ||
+                    node.Type == RegexNode.Setloopatomic ||
+                    (node.Type == RegexNode.Loop && (node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic))));
                  Debug.Assert(node.M < int.MaxValue);
  
                  // First generate the code to handle the required number of iterations.
@@ -2217,9 +2217,9 @@ namespace System.Text.RegularExpressions
                      Label originalDoneLabel = doneLabel;
                      doneLabel = DefineLabel();
  
-                    if (node.Type == RegexNode.Notoneloopgreedy && node.N == int.MaxValue && !IsCaseInsensitive(node))
+                    if (node.Type == RegexNode.Notoneloopatomic && node.N == int.MaxValue && !IsCaseInsensitive(node))
                      {
-                        // For Notoneloopgreedy, we're looking for a specific character, as everything until we find
+                        // For Notoneloopatomic, we're looking for a specific character, as everything until we find
                          // it is consumed by the loop.  If we're unbounded, such as with ".*" and if we're case-sensitive,
                          // we can use the vectorized IndexOf to do the search, rather than open-coding it. (In the future,
                          // we could consider using IndexOf with StringComparison for case insensitivity.)
@@ -2348,17 +2348,17 @@ namespace System.Text.RegularExpressions
                              LdindU2();
                              switch (node.Type)
                              {
-                                case RegexNode.Oneloopgreedy:
+                                case RegexNode.Oneloopatomic:
                                      if (IsCaseInsensitive(node)) CallToLower();
                                      Ldc(node.Ch);
                                      BneFar(doneLabel);
                                      break;
-                                case RegexNode.Notoneloopgreedy:
+                                case RegexNode.Notoneloopatomic:
                                      if (IsCaseInsensitive(node)) CallToLower();
                                      Ldc(node.Ch);
                                      BeqFar(doneLabel);
                                      break;
-                                case RegexNode.Setloopgreedy:
+                                case RegexNode.Setloopatomic:
                                      LocalBuilder setScratchLocal = RentInt32Local();
                                      EmitCallCharInClass(node.Str!, IsCaseInsensitive(node), setScratchLocal);
                                      ReturnInt32Local(setScratchLocal);
@@ -3705,18 +3705,18 @@ namespace System.Text.RegularExpressions
                  case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Rtl:
                  case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl:
                  case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl:
-                case RegexCode.Oneloopgreedy:
-                case RegexCode.Notoneloopgreedy:
-                case RegexCode.Setloopgreedy:
-                case RegexCode.Oneloopgreedy | RegexCode.Rtl:
-                case RegexCode.Notoneloopgreedy | RegexCode.Rtl:
-                case RegexCode.Setloopgreedy | RegexCode.Rtl:
-                case RegexCode.Oneloopgreedy | RegexCode.Ci:
-                case RegexCode.Notoneloopgreedy | RegexCode.Ci:
-                case RegexCode.Setloopgreedy | RegexCode.Ci:
-                case RegexCode.Oneloopgreedy | RegexCode.Ci | RegexCode.Rtl:
-                case RegexCode.Notoneloopgreedy | RegexCode.Ci | RegexCode.Rtl:
-                case RegexCode.Setloopgreedy | RegexCode.Ci | RegexCode.Rtl:
+                case RegexCode.Oneloopatomic:
+                case RegexCode.Notoneloopatomic:
+                case RegexCode.Setloopatomic:
+                case RegexCode.Oneloopatomic | RegexCode.Rtl:
+                case RegexCode.Notoneloopatomic | RegexCode.Rtl:
+                case RegexCode.Setloopatomic | RegexCode.Rtl:
+                case RegexCode.Oneloopatomic | RegexCode.Ci:
+                case RegexCode.Notoneloopatomic | RegexCode.Ci:
+                case RegexCode.Setloopatomic | RegexCode.Ci:
+                case RegexCode.Oneloopatomic | RegexCode.Ci | RegexCode.Rtl:
+                case RegexCode.Notoneloopatomic | RegexCode.Ci | RegexCode.Rtl:
+                case RegexCode.Setloopatomic | RegexCode.Ci | RegexCode.Rtl:
                      //: int c = Operand(1);
                      //: if (c > Rightchars())
                      //:     c = Rightchars();
@@ -3779,7 +3779,7 @@ namespace System.Text.RegularExpressions
                          Dup();
                          Stloc(cLocal);
                          Ldc(0);
-                        if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopgreedy)
+                        if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic)
                          {
                              BleFar(l2);
                          }
@@ -3797,7 +3797,7 @@ namespace System.Text.RegularExpressions
                              Rightcharnext();
                          }
  
-                        if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopgreedy)
+                        if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic)
                          {
                              EmitTimeoutCheck();
                              EmitCallCharInClass(_strings![Operand(0)], IsCaseInsensitive(), charInClassLocal);
@@ -3811,13 +3811,13 @@ namespace System.Text.RegularExpressions
                              }
  
                              Ldc(Operand(0));
-                            if (Code() == RegexCode.Oneloop || Code() == RegexCode.Oneloopgreedy)
+                            if (Code() == RegexCode.Oneloop || Code() == RegexCode.Oneloopatomic)
                              {
                                  Beq(l1);
                              }
                              else
                              {
-                                Debug.Assert(Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopgreedy);
+                                Debug.Assert(Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic);
                                  Bne(l1);
                              }
                          }
@@ -3829,7 +3829,7 @@ namespace System.Text.RegularExpressions
  
                          MarkLabel(l2);
  
-                        if (Code() != RegexCode.Oneloopgreedy && Code() != RegexCode.Notoneloopgreedy && Code() != RegexCode.Setloopgreedy)
+                        if (Code() != RegexCode.Oneloopatomic && Code() != RegexCode.Notoneloopatomic && Code() != RegexCode.Setloopatomic)
                          {
                              Ldloc(lenLocal);
                              Ldloc(cLocal);
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFCD.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFCD.cs

index 3f595b1..a1d9e0f 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFCD.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFCD.cs
@@ -94,14 +94,14 @@ namespace System.Text.RegularExpressions
                          }
                          break;
  
-                    case RegexNode.Greedy:
+                    case RegexNode.Atomic:
                      case RegexNode.Capture:
                          curNode = curNode.Child(0);
                          concatNode = null;
                          continue;
  
                      case RegexNode.Oneloop:
-                    case RegexNode.Oneloopgreedy:
+                    case RegexNode.Oneloopatomic:
                      case RegexNode.Onelazy:
  
                          // In release, cutoff at a length to which we can still reasonably construct a string
@@ -176,7 +176,7 @@ namespace System.Text.RegularExpressions
                          }
                          break;
  
-                    case RegexNode.Greedy:
+                    case RegexNode.Atomic:
                      case RegexNode.Capture:
                          curNode = curNode.Child(0);
                          concatNode = null;
@@ -420,8 +420,8 @@ namespace System.Text.RegularExpressions
                  case RegexNode.Group | AfterChild:
                  case RegexNode.Capture | BeforeChild:
                  case RegexNode.Capture | AfterChild:
-                case RegexNode.Greedy | BeforeChild:
-                case RegexNode.Greedy | AfterChild:
+                case RegexNode.Atomic | BeforeChild:
+                case RegexNode.Atomic | AfterChild:
                      break;
  
                  case RegexNode.Require | BeforeChild:
@@ -440,13 +440,13 @@ namespace System.Text.RegularExpressions
                      break;
  
                  case RegexNode.Oneloop:
-                case RegexNode.Oneloopgreedy:
+                case RegexNode.Oneloopatomic:
                  case RegexNode.Onelazy:
                      PushFC(new RegexFC(node.Ch, false, node.M == 0, ci));
                      break;
  
                  case RegexNode.Notoneloop:
-                case RegexNode.Notoneloopgreedy:
+                case RegexNode.Notoneloopatomic:
                  case RegexNode.Notonelazy:
                      PushFC(new RegexFC(node.Ch, true, node.M == 0, ci));
                      break;
@@ -465,7 +465,7 @@ namespace System.Text.RegularExpressions
                      break;
  
                  case RegexNode.Setloop:
-                case RegexNode.Setloopgreedy:
+                case RegexNode.Setloopatomic:
                  case RegexNode.Setlazy:
                      PushFC(new RegexFC(node.Str!, node.M == 0, ci));
                      break;
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs

index 895a0a1..27f9a38 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs
@@ -1133,7 +1133,7 @@ namespace System.Text.RegularExpressions
                          }
  
                      case RegexCode.Oneloop:
-                    case RegexCode.Oneloopgreedy:
+                    case RegexCode.Oneloopatomic:
                          {
                              int c = Operand(1);
  
@@ -1163,7 +1163,7 @@ namespace System.Text.RegularExpressions
                          }
  
                      case RegexCode.Notoneloop:
-                    case RegexCode.Notoneloopgreedy:
+                    case RegexCode.Notoneloopatomic:
                          {
                              int c = Operand(1);
  
@@ -1193,7 +1193,7 @@ namespace System.Text.RegularExpressions
                          }
  
                      case RegexCode.Setloop:
-                    case RegexCode.Setloopgreedy:
+                    case RegexCode.Setloopatomic:
                          {
                              int c = Operand(1);
  
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs

index f3b665c..36476ae 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
@@ -77,9 +77,9 @@ namespace System.Text.RegularExpressions
          public const int EndZ = RegexCode.EndZ;                       //          \Z
          public const int End = RegexCode.End;                         //          \z
  
-        public const int Oneloopgreedy = RegexCode.Oneloopgreedy;        // c,n      (?> a*)
-        public const int Notoneloopgreedy = RegexCode.Notoneloopgreedy;  // c,n      (?> .*)
-        public const int Setloopgreedy = RegexCode.Setloopgreedy;        // set,n    (?> \d*)
+        public const int Oneloopatomic = RegexCode.Oneloopatomic;        // c,n      (?> a*)
+        public const int Notoneloopatomic = RegexCode.Notoneloopatomic;  // c,n      (?> .*)
+        public const int Setloopatomic = RegexCode.Setloopatomic;        // set,n    (?> \d*)
  
          // Interior nodes do not correspond to primitive operations, but
          // control structures compositing other operations
@@ -99,7 +99,7 @@ namespace System.Text.RegularExpressions
          public const int Group = 29;                                  //          (?:)       - noncapturing group
          public const int Require = 30;                                //          (?=) (?<=) - lookahead and lookbehind assertions
          public const int Prevent = 31;                                //          (?!) (?<!) - negative lookahead and lookbehind assertions
-        public const int Greedy = 32;                                 //          (?>)       - greedy subexpression
+        public const int Atomic = 32;                                 //          (?>)       - atomic subexpression
          public const int Testref = 33;                                //          (?(n) | )  - alternation, reference
          public const int Testgroup = 34;                              //          (?(...) | )- alternation, expression
  
@@ -189,15 +189,15 @@ namespace System.Text.RegularExpressions
                      switch (node.Type)
                      {
                          case Oneloop:
-                            node.Type = Oneloopgreedy;
+                            node.Type = Oneloopatomic;
                              break;
  
                          case Notoneloop:
-                            node.Type = Notoneloopgreedy;
+                            node.Type = Notoneloopatomic;
                              break;
  
                          case Setloop:
-                            node.Type = Setloopgreedy;
+                            node.Type = Setloopatomic;
                              break;
  
                          case Capture:
@@ -212,14 +212,14 @@ namespace System.Text.RegularExpressions
                                  case Alternate:
                                  case Loop:
                                  case Lazyloop:
-                                    var greedy = new RegexNode(Greedy, Options);
-                                    greedy.AddChild(existingChild);
-                                    node.ReplaceChild(node.ChildCount() - 1, greedy);
+                                    var atomic = new RegexNode(Atomic, Options);
+                                    atomic.AddChild(existingChild);
+                                    node.ReplaceChild(node.ChildCount() - 1, atomic);
                                      break;
                              }
                              continue;
  
-                        case Greedy:
+                        case Atomic:
                              node = node.Child(0);
                              continue;
                      }
@@ -254,8 +254,8 @@ namespace System.Text.RegularExpressions
                      n = ReduceLoops();
                      break;
  
-                case Greedy:
-                    n = ReduceGreedy();
+                case Atomic:
+                    n = ReduceAtomic();
                      break;
  
                  case Group:
@@ -306,33 +306,32 @@ namespace System.Text.RegularExpressions
          }
  
          /// <summary>
-        /// Simple optimization. If a greedy subexpression contains only a set loop
-        /// or a one loop, change them to be a greedy set loop or greedy one loop,
-        /// and remove the greedy node.
+        /// Simple optimization. If an atomic subexpression contains only a one/notone/set loop,
+        /// change it to be an atomic one/notone/set loop and remove the atomic node.
          /// </summary>
-        private RegexNode ReduceGreedy()
+        private RegexNode ReduceAtomic()
          {
-            Debug.Assert(Type == Greedy);
+            Debug.Assert(Type == Atomic);
              Debug.Assert(ChildCount() == 1);
  
              RegexNode child = Child(0);
              switch (child.Type)
              {
                  case Oneloop:
-                    child.Type = Oneloopgreedy;
+                    child.Type = Oneloopatomic;
                      return child;
  
                  case Notoneloop:
-                    child.Type = Notoneloopgreedy;
+                    child.Type = Notoneloopatomic;
                      return child;
  
                  case Setloop:
-                    child.Type = Setloopgreedy;
+                    child.Type = Setloopatomic;
                      return child;
  
-                case Oneloopgreedy:
-                case Notoneloopgreedy:
-                case Setloopgreedy:
+                case Oneloopatomic:
+                case Notoneloopatomic:
+                case Setloopatomic:
                      return child;
              }
  
@@ -367,11 +366,11 @@ namespace System.Text.RegularExpressions
                          switch (child.Type)
                          {
                              case Oneloop:
-                            case Oneloopgreedy:
+                            case Oneloopatomic:
                              case Notoneloop:
-                            case Notoneloopgreedy:
+                            case Notoneloopatomic:
                              case Setloop:
-                            case Setloopgreedy:
+                            case Setloopatomic:
                                  valid = true;
                                  break;
                          }
@@ -699,10 +698,10 @@ namespace System.Text.RegularExpressions
                  children.RemoveRange(j, i - j);
              }
  
-            // Now try to convert as many loops as possible to be greedy to avoid unnecessary backtracking.
+            // Now try to convert as many loops as possible to be atomic to avoid unnecessary backtracking.
              if ((Options & RegexOptions.RightToLeft) == 0)
              {
-                ReduceConcatenateWithAutoGreedy();
+                ReduceConcatenateWithAutoAtomic();
              }
  
              // If the concatenation is now empty, return an empty node, or if it's got a single child, return that child.
@@ -712,11 +711,11 @@ namespace System.Text.RegularExpressions
  
          /// <summary>
          /// Finds oneloop and setloop nodes in the concatenation that can be automatically upgraded
-        /// to oneloopgreedy and setloopgreedy nodes.  Such changes avoid potential useless backtracking.
+        /// to oneloopatomic and setloopatomic nodes.  Such changes avoid potential useless backtracking.
          /// This looks for cases like A*B, where A and B are known to not overlap: in such cases,
          /// we can effectively convert this to (?>A*)B.
          /// </summary>
-        private void ReduceConcatenateWithAutoGreedy()
+        private void ReduceConcatenateWithAutoAtomic()
          {
              Debug.Assert(Type == Concatenate);
              Debug.Assert((Options & RegexOptions.RightToLeft) == 0);
@@ -743,7 +742,7 @@ namespace System.Text.RegularExpressions
                      switch (subsequent.Type)
                      {
                          case Capture:
-                        case Greedy:
+                        case Atomic:
                          case Require:
                          case Concatenate:
                          case Loop when subsequent.M > 0:
@@ -764,7 +763,7 @@ namespace System.Text.RegularExpressions
                  }
  
                  // If this node is a one/notone/setloop, see if it overlaps with its successor in the concatenation.
-                // If it doesn't, then we can upgrade it to being a one/notone/setloopgreedy.
+                // If it doesn't, then we can upgrade it to being a one/notone/setloopatomic.
                  // Doing so avoids unnecessary backtracking.
                  switch (node.Type)
                  {
@@ -774,16 +773,16 @@ namespace System.Text.RegularExpressions
                              case One when node.Ch != subsequent.Ch:
                              case Onelazy when subsequent.M > 0 && node.Ch != subsequent.Ch:
                              case Oneloop when subsequent.M > 0 && node.Ch != subsequent.Ch:
-                            case Oneloopgreedy when subsequent.M > 0 && node.Ch != subsequent.Ch:
+                            case Oneloopatomic when subsequent.M > 0 && node.Ch != subsequent.Ch:
                              case Notone when node.Ch == subsequent.Ch:
                              case Notonelazy when subsequent.M > 0 && node.Ch == subsequent.Ch:
                              case Notoneloop when subsequent.M > 0 && node.Ch == subsequent.Ch:
-                            case Notoneloopgreedy when subsequent.M > 0 && node.Ch == subsequent.Ch:
+                            case Notoneloopatomic when subsequent.M > 0 && node.Ch == subsequent.Ch:
                              case Multi when node.Ch != subsequent.Str![0]:
                              case Set when !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
                              case Setlazy when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
                              case Setloop when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
-                            case Setloopgreedy when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
+                            case Setloopatomic when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
                              case End:
                              case EndZ when node.Ch != '\n':
                              case Eol when node.Ch != '\n':
@@ -791,7 +790,7 @@ namespace System.Text.RegularExpressions
                              case Nonboundary when !RegexCharClass.IsWordChar(node.Ch):
                              case ECMABoundary when RegexCharClass.IsECMAWordChar(node.Ch):
                              case NonECMABoundary when !RegexCharClass.IsECMAWordChar(node.Ch):
-                                node.Type = Oneloopgreedy;
+                                node.Type = Oneloopatomic;
                                  break;
                          }
                          break;
@@ -802,10 +801,10 @@ namespace System.Text.RegularExpressions
                              case One when node.Ch == subsequent.Ch:
                              case Onelazy when subsequent.M > 0 && node.Ch == subsequent.Ch:
                              case Oneloop when subsequent.M > 0 && node.Ch == subsequent.Ch:
-                            case Oneloopgreedy when subsequent.M > 0 && node.Ch == subsequent.Ch:
+                            case Oneloopatomic when subsequent.M > 0 && node.Ch == subsequent.Ch:
                              case Multi when node.Ch == subsequent.Str![0]:
                              case End:
-                                node.Type = Notoneloopgreedy;
+                                node.Type = Notoneloopatomic;
                                  break;
                          }
                          break;
@@ -816,16 +815,16 @@ namespace System.Text.RegularExpressions
                              case One when !RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
                              case Onelazy when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
                              case Oneloop when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
-                            case Oneloopgreedy when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
+                            case Oneloopatomic when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
                              case Notone when RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
                              case Notonelazy when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
                              case Notoneloop when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
-                            case Notoneloopgreedy when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
+                            case Notoneloopatomic when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
                              case Multi when !RegexCharClass.CharInClass(subsequent.Str![0], node.Str!):
                              case Set when !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
                              case Setlazy when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
                              case Setloop when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
-                            case Setloopgreedy when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
+                            case Setloopatomic when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
                              case End:
                              case EndZ when !RegexCharClass.CharInClass('\n', node.Str!):
                              case Eol when !RegexCharClass.CharInClass('\n', node.Str!):
@@ -833,7 +832,7 @@ namespace System.Text.RegularExpressions
                              case Nonboundary when node.Str == RegexCharClass.NotWordClass || node.Str == RegexCharClass.NotDigitClass:
                              case ECMABoundary when node.Str == RegexCharClass.ECMAWordClass || node.Str == RegexCharClass.ECMADigitClass:
                              case NonECMABoundary when node.Str == RegexCharClass.NotECMAWordClass || node.Str == RegexCharClass.NotDigitClass:
-                                node.Type = Setloopgreedy;
+                                node.Type = Setloopatomic;
                                  break;
                          }
                          break;
@@ -937,11 +936,11 @@ namespace System.Text.RegularExpressions
              "Nothing", "Empty",
              "Alternate", "Concatenate",
              "Loop", "Lazyloop",
-            "Capture", "Group", "Require", "Prevent", "Greedy",
+            "Capture", "Group", "Require", "Prevent", "Atomic",
              "Testref", "Testgroup",
              "", "", "", "", "", "",
              "ECMABoundary", "NonECMABoundary",
-            "Oneloopgreedy", "Notoneloopgreedy", "Setloopgreedy",
+            "Oneloopatomic", "Notoneloopatomic", "Setloopatomic",
          };
  
          public string Description()
@@ -968,9 +967,9 @@ namespace System.Text.RegularExpressions
              switch (Type)
              {
                  case Oneloop:
-                case Oneloopgreedy:
+                case Oneloopatomic:
                  case Notoneloop:
-                case Notoneloopgreedy:
+                case Notoneloopatomic:
                  case Onelazy:
                  case Notonelazy:
                  case One:
@@ -989,7 +988,7 @@ namespace System.Text.RegularExpressions
                      break;
                  case Set:
                  case Setloop:
-                case Setloopgreedy:
+                case Setloopatomic:
                  case Setlazy:
                      argSb.Append("(Set = " + RegexCharClass.SetDescription(Str!) + ")");
                      break;
@@ -998,13 +997,13 @@ namespace System.Text.RegularExpressions
              switch (Type)
              {
                  case Oneloop:
-                case Oneloopgreedy:
+                case Oneloopatomic:
                  case Notoneloop:
-                case Notoneloopgreedy:
+                case Notoneloopatomic:
                  case Onelazy:
                  case Notonelazy:
                  case Setloop:
-                case Setloopgreedy:
+                case Setloopatomic:
                  case Setlazy:
                  case Loop:
                  case Lazyloop:
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs

index 171cb65..393a468 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs
@@ -802,8 +802,8 @@ namespace System.Text.RegularExpressions
                          break;
  
                      case '>':
-                        // greedy subexpression
-                        nodeType = RegexNode.Greedy;
+                        // atomic subexpression
+                        nodeType = RegexNode.Atomic;
                          break;
  
                      case '\'':
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs

index b8f30b5..9dd8d40 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs
@@ -428,11 +428,11 @@ namespace System.Text.RegularExpressions
                      Emit(RegexCode.Forejump);
                      break;
  
-                case RegexNode.Greedy | BeforeChild:
+                case RegexNode.Atomic | BeforeChild:
                      Emit(RegexCode.Setjump);
                      break;
  
-                case RegexNode.Greedy | AfterChild:
+                case RegexNode.Atomic | AfterChild:
                      Emit(RegexCode.Forejump);
                      break;
  
@@ -442,14 +442,14 @@ namespace System.Text.RegularExpressions
                      break;
  
                  case RegexNode.Notoneloop:
-                case RegexNode.Notoneloopgreedy:
+                case RegexNode.Notoneloopatomic:
                  case RegexNode.Notonelazy:
                  case RegexNode.Oneloop:
-                case RegexNode.Oneloopgreedy:
+                case RegexNode.Oneloopatomic:
                  case RegexNode.Onelazy:
                      if (node.M > 0)
                      {
-                        Emit(((node.Type == RegexNode.Oneloop || node.Type == RegexNode.Oneloopgreedy || node.Type == RegexNode.Onelazy) ?
+                        Emit(((node.Type == RegexNode.Oneloop || node.Type == RegexNode.Oneloopatomic || node.Type == RegexNode.Onelazy) ?
                                RegexCode.Onerep : RegexCode.Notonerep) | bits, node.Ch, node.M);
                      }
                      if (node.N > node.M)
@@ -459,7 +459,7 @@ namespace System.Text.RegularExpressions
                      break;
  
                  case RegexNode.Setloop:
-                case RegexNode.Setloopgreedy:
+                case RegexNode.Setloopatomic:
                  case RegexNode.Setlazy:
                      {
                          int stringCode = StringCode(node.Str!);
diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs

index 13a7c26..9a4ca22 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs
@@ -642,8 +642,8 @@ namespace System.Text.RegularExpressions.Tests
              yield return new object[] { null, @"(cat){5,dog}?", "cat{5,dog}?", RegexOptions.None, new string[] { "cat{5,dog}", "cat" } };
              yield return new object[] { null, @"(cat){cat,dog}?", "cat{cat,dog}?", RegexOptions.None, new string[] { "cat{cat,dog}", "cat" } };
  
-            // Atomic ("greedy") subexpressions
-            // Implicitly upgrading oneloop to be greedy
+            // Atomic subexpressions
+            // Implicitly upgrading oneloop to be atomic
              yield return new object[] { null, @"a*", "aaa", RegexOptions.None, new string[] { "aaa" } };
              yield return new object[] { null, @"a*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
              yield return new object[] { null, @"a*b+", "aaab", RegexOptions.None, new string[] { "aaab" } };
@@ -665,14 +665,14 @@ namespace System.Text.RegularExpressions.Tests
              yield return new object[] { null, @"a*\b", "aaa bbb", RegexOptions.ECMAScript, new string[] { "aaa" } };
              yield return new object[] { null, @"@*\B", "@@@", RegexOptions.None, new string[] { "@@@" } };
              yield return new object[] { null, @"@*\B", "@@@", RegexOptions.ECMAScript, new string[] { "@@@" } };
-            // Implicitly upgrading notoneloop to be greedy
+            // Implicitly upgrading notoneloop to be atomic
              yield return new object[] { null, @"[^b]*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
              yield return new object[] { null, @"[^b]*b+", "aaab", RegexOptions.None, new string[] { "aaab" } };
              yield return new object[] { null, @"[^b]*b+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
              yield return new object[] { null, @"[^b]*(?>b+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
              yield return new object[] { null, @"[^b]*bac", "aaabac", RegexOptions.None, new string[] { "aaabac" } };
              yield return new object[] { null, @"[^b]*", "aaa", RegexOptions.None, new string[] { "aaa" } };
-            // Implicitly upgrading setloop to be greedy
+            // Implicitly upgrading setloop to be atomic
              yield return new object[] { null, @"[ac]*", "aaa", RegexOptions.None, new string[] { "aaa" } };
              yield return new object[] { null, @"[ac]*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
              yield return new object[] { null, @"[ac]*b+", "aaab", RegexOptions.None, new string[] { "aaab" } };
@@ -694,7 +694,7 @@ namespace System.Text.RegularExpressions.Tests
              yield return new object[] { null, @"[ac]*\b", "aaa bbb", RegexOptions.ECMAScript, new string[] { "aaa" } };
              yield return new object[] { null, @"[@']*\B", "@@@", RegexOptions.None, new string[] { "@@@" } };
              yield return new object[] { null, @"[@']*\B", "@@@", RegexOptions.ECMAScript, new string[] { "@@@" } };
-            // Implicitly upgrading nested loops to be greedy
+            // Implicitly upgrading nested loops to be atomic
              yield return new object[] { null, @"(?:a){3}", "aaaaaaaaa", RegexOptions.None, new string[] { "aaa" } };
              yield return new object[] { null, @"(?:a){3}?", "aaaaaaaaa", RegexOptions.None, new string[] { "aaa" } };
              yield return new object[] { null, @"(?:a{2}){3}", "aaaaaaaaa", RegexOptions.None, new string[] { "aaaaaa" } };
author	Stephen Toub <stoub@microsoft.com>
	Mon, 6 Jan 2020 21:43:05 +0000 (16:43 -0500)
committer	Stephen Toub <stoub@microsoft.com>
	Thu, 9 Jan 2020 03:50:10 +0000 (22:50 -0500)
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFCD.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs		patch \| blob \| history