Imported Upstream version 0.19.7

[platform/upstream/gettext.git] / gettext-tools / gnulib-lib / libxml / xmlregexp.c
diff --git a/gettext-tools/gnulib-lib/libxml/xmlregexp.c b/gettext-tools/gnulib-lib/libxml/xmlregexp.c

index e7d519e..3e912ab 100644 (file)
--- a/gettext-tools/gnulib-lib/libxml/xmlregexp.c
+++ b/gettext-tools/gnulib-lib/libxml/xmlregexp.c
@@ -1,7 +1,7 @@
  /*
   * regexp.c: generic and extensible Regular Expression engine
   *
- * Basically designed with the purpose of compiling regexps for 
+ * Basically designed with the purpose of compiling regexps for
   * the variety of validation/shemas mechanisms now available in
   * XML related specifications these include:
   *    - XML-1.0 DTD validation
@@ -44,6 +44,9 @@
  
  #define MAX_PUSH 10000000
  
+#ifdef ERROR
+#undef ERROR
+#endif
  #define ERROR(str)                                                     \
      ctxt->error = XML_REGEXP_COMPILE_ERROR;                            \
      xmlRegexpErrCompile(ctxt, str);
@@ -54,21 +57,26 @@
  #define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
  #define NEXTL(l) ctxt->cur += l;
  #define XML_REG_STRING_SEPARATOR '|'
+/*
+ * Need PREV to check on a '-' within a Character Group. May only be used
+ * when it's guaranteed that cur is not at the beginning of ctxt->string!
+ */
+#define PREV (ctxt->cur[-1])
  
  /**
   * TODO:
   *
   * macro to flag unimplemented blocks
   */
-#define TODO                                                           \
+#define TODO                                                           \
      xmlGenericError(xmlGenericErrorContext,                            \
             "Unimplemented block at %s:%d\n",                           \
              __FILE__, __LINE__);
  
  /************************************************************************
- *                                                                     *
- *                     Datatypes and structures                        *
- *                                                                     *
+ *                                                                     *
+ *                     Datatypes and structures                        *
+ *                                                                     *
   ************************************************************************/
  
  /*
@@ -145,7 +153,8 @@ typedef enum {
      XML_REGEXP_START_STATE = 1,
      XML_REGEXP_FINAL_STATE,
      XML_REGEXP_TRANS_STATE,
-    XML_REGEXP_SINK_STATE
+    XML_REGEXP_SINK_STATE,
+    XML_REGEXP_UNREACH_STATE
  } xmlRegStateType;
  
  typedef enum {
@@ -183,6 +192,7 @@ struct _xmlRegAtom {
      int neg;
      int codepoint;
      xmlRegStatePtr start;
+    xmlRegStatePtr start0;
      xmlRegStatePtr stop;
      int maxRanges;
      int nbRanges;
@@ -212,6 +222,7 @@ struct _xmlRegTrans {
  struct _xmlAutomataState {
      xmlRegStateType type;
      xmlRegMarkedType mark;
+    xmlRegMarkedType markd;
      xmlRegMarkedType reached;
      int no;
      int maxTrans;
@@ -226,6 +237,8 @@ struct _xmlAutomataState {
  typedef struct _xmlAutomata xmlRegParserCtxt;
  typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
  
+#define AM_AUTOMATA_RNG 1
+
  struct _xmlAutomata {
      xmlChar *string;
      xmlChar *cur;
@@ -253,6 +266,7 @@ struct _xmlAutomata {
  
      int determinist;
      int negs;
+    int flags;
  };
  
  struct _xmlRegexp {
@@ -264,6 +278,7 @@ struct _xmlRegexp {
      int nbCounters;
      xmlRegCounter *counters;
      int determinist;
+    int flags;
      /*
       * That's the compact form for determinists automatas
       */
@@ -346,9 +361,11 @@ static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
  static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
                    int neg, int start, int end, const xmlChar *blockName);
  
+void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
+
  /************************************************************************
   *                                                                     *
- *             Regexp memory error handler                             *
+ *             Regexp memory error handler                             *
   *                                                                     *
   ************************************************************************/
  /**
@@ -395,9 +412,9 @@ xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
  }
  
  /************************************************************************
- *                                                                     *
- *                     Allocation/Deallocation                         *
- *                                                                     *
+ *                                                                     *
+ *                     Allocation/Deallocation                         *
+ *                                                                     *
   ************************************************************************/
  
  static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
@@ -427,6 +444,7 @@ xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
      ret->nbCounters = ctxt->nbCounters;
      ret->counters = ctxt->counters;
      ret->determinist = ctxt->determinist;
+    ret->flags = ctxt->flags;
      if (ret->determinist == -1) {
          xmlRegexpIsDeterminist(ret);
      }
@@ -727,11 +745,41 @@ xmlRegFreeRange(xmlRegRangePtr range) {
  }
  
  /**
+ * xmlRegCopyRange:
+ * @range:  the regexp range
+ *
+ * Copy a regexp range
+ *
+ * Returns the new copy or NULL in case of error.
+ */
+static xmlRegRangePtr
+xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
+    xmlRegRangePtr ret;
+
+    if (range == NULL)
+       return(NULL);
+
+    ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
+                         range->end);
+    if (ret == NULL)
+        return(NULL);
+    if (range->blockName != NULL) {
+       ret->blockName = xmlStrdup(range->blockName);
+       if (ret->blockName == NULL) {
+           xmlRegexpErrMemory(ctxt, "allocating range");
+           xmlRegFreeRange(ret);
+           return(NULL);
+       }
+    }
+    return(ret);
+}
+
+/**
   * xmlRegNewAtom:
   * @ctxt:  the regexp parser context
   * @type:  the type of atom
   *
- * Allocate a new regexp range
+ * Allocate a new atom
   *
   * Returns the new atom or NULL in case of error
   */
@@ -778,6 +826,52 @@ xmlRegFreeAtom(xmlRegAtomPtr atom) {
      xmlFree(atom);
  }
  
+/**
+ * xmlRegCopyAtom:
+ * @ctxt:  the regexp parser context
+ * @atom:  the oiginal atom
+ *
+ * Allocate a new regexp range
+ *
+ * Returns the new atom or NULL in case of error
+ */
+static xmlRegAtomPtr
+xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
+    xmlRegAtomPtr ret;
+
+    ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
+    if (ret == NULL) {
+       xmlRegexpErrMemory(ctxt, "copying atom");
+       return(NULL);
+    }
+    memset(ret, 0, sizeof(xmlRegAtom));
+    ret->type = atom->type;
+    ret->quant = atom->quant;
+    ret->min = atom->min;
+    ret->max = atom->max;
+    if (atom->nbRanges > 0) {
+        int i;
+
+        ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
+                                                  atom->nbRanges);
+       if (ret->ranges == NULL) {
+           xmlRegexpErrMemory(ctxt, "copying atom");
+           goto error;
+       }
+       for (i = 0;i < atom->nbRanges;i++) {
+           ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
+           if (ret->ranges[i] == NULL)
+               goto error;
+           ret->nbRanges = i + 1;
+       }
+    }
+    return(ret);
+
+error:
+    xmlRegFreeAtom(ret);
+    return(NULL);
+}
+
  static xmlRegStatePtr
  xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
      xmlRegStatePtr ret;
@@ -841,9 +935,9 @@ xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
  }
  
  /************************************************************************
- *                                                                     *
- *                     Display of Data structures                      *
- *                                                                     *
+ *                                                                     *
+ *                     Display of Data structures                      *
+ *                                                                     *
   ************************************************************************/
  
  static void
@@ -1050,7 +1144,7 @@ xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
         fprintf(output, "char %c ", trans->atom->codepoint);
      fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
  }
-    
+
  static void
  xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
      int i;
@@ -1064,7 +1158,7 @@ xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
         fprintf(output, "START ");
      if (state->type == XML_REGEXP_FINAL_STATE)
         fprintf(output, "FINAL ");
-    
+
      fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
      for (i = 0;i < state->nbTrans; i++) {
         xmlRegPrintTrans(output, &(state->trans[i]));
@@ -1114,12 +1208,12 @@ xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
  #endif
  
  /************************************************************************
- *                                                                     *
+ *                                                                     *
   *              Finite Automata structures manipulations               *
- *                                                                     *
+ *                                                                     *
   ************************************************************************/
  
-static void 
+static void
  xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
                    int neg, xmlRegAtomType type, int start, int end,
                    xmlChar *blockName) {
@@ -1159,7 +1253,7 @@ xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
         return;
      range->blockName = blockName;
      atom->ranges[atom->nbRanges++] = range;
-    
+
  }
  
  static int
@@ -1190,7 +1284,7 @@ xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
      return(ctxt->nbCounters++);
  }
  
-static int 
+static int
  xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
      if (atom == NULL) {
         ERROR("atom push: atom is NULL");
@@ -1222,7 +1316,7 @@ xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
      return(0);
  }
  
-static void 
+static void
  xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
                        int from) {
      if (target->maxTransTo == 0) {
@@ -1250,7 +1344,7 @@ xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
      target->nbTransTo++;
  }
  
-static void 
+static void
  xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
                     xmlRegAtomPtr atom, xmlRegStatePtr target,
                     int counter, int count) {
@@ -1316,7 +1410,7 @@ xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
         printf("counted %d\n", counter);
      else if (atom == NULL)
         printf("epsilon transition\n");
-    else if (atom != NULL) 
+    else if (atom != NULL)
          xmlRegPrintAtom(stdout, atom);
  #endif
  
@@ -1449,6 +1543,8 @@ xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
  static int
  xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
                          xmlRegStatePtr to, xmlRegAtomPtr atom) {
+    xmlRegStatePtr end;
+
      if (atom == NULL) {
         ERROR("genrate transition: atom == NULL");
         return(-1);
@@ -1468,7 +1564,7 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
              */
             xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
  #ifdef DV
-       } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) && 
+       } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&
                    (atom->quant != XML_REGEXP_QUANT_ONCE)) {
             to = xmlRegNewState(ctxt);
             xmlRegStatePush(ctxt, to);
@@ -1482,10 +1578,15 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
                 /*
                  * transition done to the state after end of atom.
                  *      1. set transition from atom start to new state
-                *      2. set transition from atom end to this state. 
+                *      2. set transition from atom end to this state.
                  */
-               xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
-               xmlFAGenerateEpsilonTransition(ctxt, atom->stop, ctxt->state);
+                if (to == NULL) {
+                    xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
+                    xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
+                                                   ctxt->state);
+                } else {
+                    xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
+                }
                 break;
             case XML_REGEXP_QUANT_MULT:
                 atom->quant = XML_REGEXP_QUANT_ONCE;
@@ -1498,58 +1599,89 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
                 break;
             case XML_REGEXP_QUANT_RANGE: {
                 int counter;
-               xmlRegStatePtr newstate;
+               xmlRegStatePtr inter, newstate;
  
                 /*
-                * This one is nasty:
-                *   1/ if range has minOccurs == 0, create a new state
-                *      and create epsilon transitions from atom->start
-                *      to atom->stop, as well as atom->start to the new
-                *      state
-                *   2/ register a new counter
-                *   3/ register an epsilon transition associated to
-                *      this counter going from atom->stop to atom->start
-                *   4/ create a new state
-                *   5/ generate a counted transition from atom->stop to
-                *      that state
+                * create the final state now if needed
                  */
-               if (atom->min == 0) {
-                   xmlFAGenerateEpsilonTransition(ctxt, atom->start,
-                       atom->stop);
+               if (to != NULL) {
+                   newstate = to;
+               } else {
                     newstate = xmlRegNewState(ctxt);
                     xmlRegStatePush(ctxt, newstate);
-                   ctxt->state = newstate;
+               }
+
+               /*
+                * The principle here is to use counted transition
+                * to avoid explosion in the number of states in the
+                * graph. This is clearly more complex but should not
+                * be exploitable at runtime.
+                */
+               if ((atom->min == 0) && (atom->start0 == NULL)) {
+                   xmlRegAtomPtr copy;
+                   /*
+                    * duplicate a transition based on atom to count next
+                    * occurences after 1. We cannot loop to atom->start
+                    * directly because we need an epsilon transition to
+                    * newstate.
+                    */
+                    /* ???? For some reason it seems we never reach that
+                       case, I suppose this got optimized out before when
+                       building the automata */
+                   copy = xmlRegCopyAtom(ctxt, atom);
+                   if (copy == NULL)
+                       return(-1);
+                   copy->quant = XML_REGEXP_QUANT_ONCE;
+                   copy->min = 0;
+                   copy->max = 0;
+
+                   if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
+                       < 0)
+                       return(-1);
+                   inter = ctxt->state;
+                   counter = xmlRegGetCounter(ctxt);
+                   ctxt->counters[counter].min = atom->min - 1;
+                   ctxt->counters[counter].max = atom->max - 1;
+                   /* count the number of times we see it again */
+                   xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
+                                                  atom->stop, counter);
+                   /* allow a way out based on the count */
+                   xmlFAGenerateCountedTransition(ctxt, inter,
+                                                  newstate, counter);
+                   /* and also allow a direct exit for 0 */
                     xmlFAGenerateEpsilonTransition(ctxt, atom->start,
-                       newstate);
+                                                  newstate);
+               } else {
+                   /*
+                    * either we need the atom at least once or there
+                    * is an atom->start0 allowing to easilly plug the
+                    * epsilon transition.
+                    */
+                   counter = xmlRegGetCounter(ctxt);
+                   ctxt->counters[counter].min = atom->min - 1;
+                   ctxt->counters[counter].max = atom->max - 1;
+                   /* count the number of times we see it again */
+                   xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
+                                                  atom->start, counter);
+                   /* allow a way out based on the count */
+                   xmlFAGenerateCountedTransition(ctxt, atom->stop,
+                                                  newstate, counter);
+                   /* and if needed allow a direct exit for 0 */
+                   if (atom->min == 0)
+                       xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
+                                                      newstate);
+
                 }
-               counter = xmlRegGetCounter(ctxt);
-               ctxt->counters[counter].min = atom->min - 1;
-               ctxt->counters[counter].max = atom->max - 1;
                 atom->min = 0;
                 atom->max = 0;
                 atom->quant = XML_REGEXP_QUANT_ONCE;
-               if (to != NULL) {
-                   newstate = to;
-               } else {
-                   newstate = xmlRegNewState(ctxt);
-                   xmlRegStatePush(ctxt, newstate);
-               }
                 ctxt->state = newstate;
-               xmlFAGenerateCountedTransition(ctxt, atom->stop,
-                                              newstate, counter);
-                
-                /*
-                * first check count and if OK jump forward; 
-                 * if checking fail increment count and jump back
-                */
-               xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
-                                                     atom->start, counter);
             }
             default:
                 break;
         }
         return(0);
-    } 
+    }
      if ((atom->min == 0) && (atom->max == 0) &&
                 (atom->quant == XML_REGEXP_QUANT_RANGE)) {
          /*
@@ -1576,11 +1708,30 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
             return(-1);
         }
      }
+    end = to;
+    if ((atom->quant == XML_REGEXP_QUANT_MULT) ||
+        (atom->quant == XML_REGEXP_QUANT_PLUS)) {
+       /*
+        * Do not pollute the target state by adding transitions from
+        * it as it is likely to be the shared target of multiple branches.
+        * So isolate with an epsilon transition.
+        */
+        xmlRegStatePtr tmp;
+
+       tmp = xmlRegNewState(ctxt);
+       if (tmp != NULL)
+           xmlRegStatePush(ctxt, tmp);
+       else {
+           return(-1);
+       }
+       xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
+       to = tmp;
+    }
      if (xmlRegAtomPush(ctxt, atom) < 0) {
         return(-1);
      }
      xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
-    ctxt->state = to;
+    ctxt->state = end;
      switch (atom->quant) {
         case XML_REGEXP_QUANT_OPT:
             atom->quant = XML_REGEXP_QUANT_ONCE;
@@ -1595,6 +1746,13 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
             atom->quant = XML_REGEXP_QUANT_ONCE;
             xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
             break;
+       case XML_REGEXP_QUANT_RANGE:
+#if DV_test
+           if (atom->min == 0) {
+               xmlFAGenerateEpsilonTransition(ctxt, from, to);
+           }
+#endif
+           break;
         default:
             break;
      }
@@ -1605,7 +1763,7 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
   * xmlFAReduceEpsilonTransitions:
   * @ctxt:  a regexp parser context
   * @fromnr:  the from state
- * @tonr:  the to state 
+ * @tonr:  the to state
   * @counter:  should that transition be associated to a counted
   *
   */
@@ -1649,7 +1807,7 @@ xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
                     int newto = to->trans[transnr].to;
  
                     xmlRegStateAddTrans(ctxt, from, NULL,
-                                       ctxt->states[newto], 
+                                       ctxt->states[newto],
                                         -1, to->trans[transnr].count);
                 } else {
  #ifdef DEBUG_REGEXP_GRAPH
@@ -1671,11 +1829,11 @@ xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
             int newto = to->trans[transnr].to;
  
             if (to->trans[transnr].counter >= 0) {
-               xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 
-                                   ctxt->states[newto], 
+               xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
+                                   ctxt->states[newto],
                                     to->trans[transnr].counter, -1);
             } else {
-               xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 
+               xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
                                     ctxt->states[newto], counter, -1);
             }
         }
@@ -1687,7 +1845,7 @@ xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
   * xmlFAEliminateSimpleEpsilonTransitions:
   * @ctxt:  a regexp parser context
   *
- * Eliminating general epsilon transitions can get costly in the general 
+ * Eliminating general epsilon transitions can get costly in the general
   * algorithm due to the large amount of generated new transitions and
   * associated comparisons. However for simple epsilon transition used just
   * to separate building blocks when generating the automata this can be
@@ -1709,6 +1867,8 @@ xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
             continue;
         if (state->nbTrans != 1)
             continue;
+       if (state->type == XML_REGEXP_UNREACH_STATE)
+           continue;
         /* is the only transition out a basic transition */
         if ((state->trans[0].atom == NULL) &&
             (state->trans[0].to >= 0) &&
@@ -1721,48 +1881,37 @@ xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
  #ifdef DEBUG_REGEXP_GRAPH
                 printf("Found simple epsilon trans from start %d to %d\n",
                        statenr, newto);
-#endif     
+#endif
              } else {
  #ifdef DEBUG_REGEXP_GRAPH
                 printf("Found simple epsilon trans from %d to %d\n",
                        statenr, newto);
-#endif     
+#endif
                 for (i = 0;i < state->nbTransTo;i++) {
                     tmp = ctxt->states[state->transTo[i]];
                     for (j = 0;j < tmp->nbTrans;j++) {
                         if (tmp->trans[j].to == statenr) {
-                           tmp->trans[j].to = newto;
-#ifdef DEBUG_REGEXP_GRAPH
-                           printf("Changed transition %d on %d to go to %d\n",
-                                  j, tmp->no, newto);
-#endif     
-                            xmlRegStateAddTransTo(ctxt, ctxt->states[newto],
-                                                 tmp->no);
-                       }
-                   }
-               }
-#if 0
-               for (i = 0;i < ctxt->nbStates;i++) {
-                   tmp = ctxt->states[i];
-                   for (j = 0;j < tmp->nbTrans;j++) {
-                       if (tmp->trans[j].to == statenr) {
-                           tmp->trans[j].to = newto;
  #ifdef DEBUG_REGEXP_GRAPH
                             printf("Changed transition %d on %d to go to %d\n",
                                    j, tmp->no, newto);
-#endif     
+#endif
+                           tmp->trans[j].to = -1;
+                           xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
+                                               ctxt->states[newto],
+                                               tmp->trans[j].counter,
+                                               tmp->trans[j].count);
                         }
                     }
                 }
-#endif
                 if (state->type == XML_REGEXP_FINAL_STATE)
                     ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
                 /* eliminate the transition completely */
                 state->nbTrans = 0;
  
+                state->type = XML_REGEXP_UNREACH_STATE;
  
             }
-            
+
         }
      }
  }
@@ -1779,16 +1928,33 @@ xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
  
      if (ctxt->states == NULL) return;
  
+    /*
+     * Eliminate simple epsilon transition and the associated unreachable
+     * states.
+     */
      xmlFAEliminateSimpleEpsilonTransitions(ctxt);
+    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
+       state = ctxt->states[statenr];
+       if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {
+#ifdef DEBUG_REGEXP_GRAPH
+           printf("Removed unreachable state %d\n", statenr);
+#endif
+           xmlRegFreeState(state);
+           ctxt->states[statenr] = NULL;
+       }
+    }
  
      has_epsilon = 0;
  
      /*
-     * build the completed transitions bypassing the epsilons
+     * Build the completed transitions bypassing the epsilons
       * Use a marking algorithm to avoid loops
-     * mark sink states too.
+     * Mark sink states too.
+     * Process from the latests states backward to the start when
+     * there is long cascading epsilon chains this minimize the
+     * recursions and transition compares when adding the new ones
       */
-    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
+    for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
         state = ctxt->states[statenr];
         if (state == NULL)
             continue;
@@ -1812,8 +1978,9 @@ xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
                     printf("Found epsilon trans %d from %d to %d\n",
                            transnr, statenr, newto);
  #endif
-                   state->mark = XML_REGEXP_MARK_START;
                     has_epsilon = 1;
+                   state->trans[transnr].to = -2;
+                   state->mark = XML_REGEXP_MARK_START;
                     xmlFAReduceEpsilonTransitions(ctxt, statenr,
                                       newto, state->trans[transnr].counter);
                     state->mark = XML_REGEXP_MARK_NORMAL;
@@ -1932,12 +2099,13 @@ xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
                 (range2->type == XML_REGEXP_EPSILON)) {
         return(0);
      } else if (range1->type == range2->type) {
-        if ((range1->type != XML_REGEXP_CHARVAL) ||
-           (range1->end < range2->start) ||
-           (range2->end < range1->start))
-           ret = 1;
-       else
+        if (range1->type != XML_REGEXP_CHARVAL)
+            ret = 1;
+        else if ((range1->end < range2->start) ||
+                (range2->end < range1->start))
             ret = 0;
+       else
+           ret = 1;
      } else if (range1->type == XML_REGEXP_CHARVAL) {
          int codepoint;
         int neg = 0;
@@ -1945,7 +2113,7 @@ xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
         /*
          * just check all codepoints in the range for acceptance,
          * this is usually way cheaper since done only once at
-        * compilation than testing over and over at runtime or 
+        * compilation than testing over and over at runtime or
          * pushing too many states when evaluating.
          */
         if (((range1->neg == 0) && (range2->neg != 0)) ||
@@ -2064,7 +2232,7 @@ xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
      if (((range1->neg == 0) && (range2->neg != 0)) ||
          ((range1->neg != 0) && (range2->neg == 0)))
         ret = !ret;
-    return(1);
+    return(ret);
  }
  
  /**
@@ -2272,6 +2440,7 @@ xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
   * xmlFAEqualAtoms:
   * @atom1:  an atom
   * @atom2:  an atom
+ * @deep: if not set only compare string pointers
   *
   * Compares two atoms to check whether they are the same exactly
   * this is used to remove equivalent transitions
@@ -2279,7 +2448,7 @@ xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
   * Returns 1 if same and 0 otherwise
   */
  static int
-xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
+xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
      int ret = 0;
  
      if (atom1 == atom2)
@@ -2294,8 +2463,11 @@ xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
             ret = 0;
             break;
          case XML_REGEXP_STRING:
-           ret = xmlStrEqual((xmlChar *)atom1->valuep,
-                             (xmlChar *)atom2->valuep);
+            if (!deep)
+                ret = (atom1->valuep == atom2->valuep);
+            else
+                ret = xmlStrEqual((xmlChar *)atom1->valuep,
+                                  (xmlChar *)atom2->valuep);
             break;
          case XML_REGEXP_CHARVAL:
             ret = (atom1->codepoint == atom2->codepoint);
@@ -2313,6 +2485,7 @@ xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
   * xmlFACompareAtoms:
   * @atom1:  an atom
   * @atom2:  an atom
+ * @deep: if not set only compare string pointers
   *
   * Compares two atoms to check whether they intersect in some ways,
   * this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only
@@ -2320,7 +2493,7 @@ xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
   * Returns 1 if yes and 0 otherwise
   */
  static int
-xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
+xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
      int ret = 1;
  
      if (atom1 == atom2)
@@ -2346,8 +2519,11 @@ xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
      }
      switch (atom1->type) {
          case XML_REGEXP_STRING:
-           ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
-                                        (xmlChar *)atom2->valuep);
+            if (!deep)
+                ret = (atom1->valuep != atom2->valuep);
+            else
+                ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
+                                             (xmlChar *)atom2->valuep);
             break;
          case XML_REGEXP_EPSILON:
             goto not_determinist;
@@ -2410,9 +2586,16 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
      int res;
      int transnr, nbTrans;
      xmlRegTransPtr t1;
+    int deep = 1;
  
      if (state == NULL)
         return(ret);
+    if (state->markd == XML_REGEXP_MARK_VISITED)
+       return(ret);
+
+    if (ctxt->flags & AM_AUTOMATA_RNG)
+        deep = 0;
+
      /*
       * don't recurse on transitions potentially added in the course of
       * the elimination.
@@ -2424,10 +2607,12 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
          * check transitions conflicting with the one looked at
          */
         if (t1->atom == NULL) {
-           if (t1->to == -1)
+           if (t1->to < 0)
                 continue;
+           state->markd = XML_REGEXP_MARK_VISITED;
             res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
                                            to, atom);
+           state->markd = 0;
             if (res == 0) {
                 ret = 0;
                 /* t1->nd = 1; */
@@ -2436,7 +2621,7 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
         }
         if (t1->to != to)
             continue;
-       if (xmlFACompareAtoms(t1->atom, atom)) {
+       if (xmlFACompareAtoms(t1->atom, atom, deep)) {
             ret = 0;
             /* mark the transition as non-deterministic */
             t1->nd = 1;
@@ -2460,6 +2645,7 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
      xmlRegTransPtr t1, t2, last;
      int i;
      int ret = 1;
+    int deep = 1;
  
  #ifdef DEBUG_REGEXP_GRAPH
      printf("xmlFAComputesDeterminism\n");
@@ -2468,6 +2654,9 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
      if (ctxt->determinist != -1)
         return(ctxt->determinist);
  
+    if (ctxt->flags & AM_AUTOMATA_RNG)
+        deep = 0;
+
      /*
       * First cleanup the automata removing cancelled transitions
       */
@@ -2495,7 +2684,13 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
                     continue;
                 if (t2->atom != NULL) {
                     if (t1->to == t2->to) {
-                       if (xmlFAEqualAtoms(t1->atom, t2->atom))
+                        /*
+                         * Here we use deep because we want to keep the
+                         * transitions which indicate a conflict
+                         */
+                       if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
+                            (t1->counter == t2->counter) &&
+                            (t1->count == t2->count))
                             t2->to = -1; /* eliminated */
                     }
                 }
@@ -2530,8 +2725,11 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
                 if (t2->to == -1) /* eliminated */
                     continue;
                 if (t2->atom != NULL) {
-                   /* not determinist ! */
-                   if (xmlFACompareAtoms(t1->atom, t2->atom)) {
+                    /*
+                     * But here we don't use deep because we want to
+                     * find transitions which indicate a conflict
+                     */
+                   if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
                         ret = 0;
                         /* mark the transitions as non-deterministic ones */
                         t1->nd = 1;
@@ -2583,9 +2781,9 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
  }
  
  /************************************************************************
- *                                                                     *
+ *                                                                     *
   *     Routines to check input against transition atoms                *
- *                                                                     *
+ *                                                                     *
   ************************************************************************/
  
  static int
@@ -2614,7 +2812,7 @@ xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
          case XML_REGEXP_NOTINITNAME:
             neg = !neg;
          case XML_REGEXP_INITNAME:
-           ret = (IS_LETTER(codepoint) || 
+           ret = (IS_LETTER(codepoint) ||
                    (codepoint == '_') || (codepoint == ':'));
             break;
          case XML_REGEXP_NOTNAMECHAR:
@@ -2862,9 +3060,9 @@ xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
  }
  
  /************************************************************************
- *                                                                     *
+ *                                                                     *
   *     Saving and restoring state of an execution context              *
- *                                                                     *
+ *                                                                     *
   ************************************************************************/
  
  #ifdef DEBUG_REGEXP_EXEC
@@ -2875,7 +3073,8 @@ xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
         int i;
         printf(": ");
         for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
-           printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]);
+           printf("%s ", (const char *)
+                  exec->inputStack[exec->inputStackNr - (i + 1)].value);
      } else {
         printf(": %s", &(exec->inputString[exec->index]));
      }
@@ -2963,8 +3162,10 @@ xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
             exec->status = -6;
             return;
         }
-       memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
+       if (exec->counts) {
+           memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
                exec->comp->nbCounters * sizeof(int));
+       }
      }
  
  #ifdef DEBUG_REGEXP_EXEC
@@ -2974,9 +3175,9 @@ xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
  }
  
  /************************************************************************
- *                                                                     *
+ *                                                                     *
   *     Verifier, running an input against a compiled regexp            *
- *                                                                     *
+ *                                                                     *
   ************************************************************************/
  
  static int
@@ -3008,9 +3209,10 @@ xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
          memset(exec->counts, 0, comp->nbCounters * sizeof(int));
      } else
         exec->counts = NULL;
-    while ((exec->status == 0) &&
+    while ((exec->status == 0) && (exec->state != NULL) &&
            ((exec->inputString[exec->index] != 0) ||
-           (exec->state->type != XML_REGEXP_FINAL_STATE))) {
+           ((exec->state != NULL) &&
+            (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
         xmlRegTransPtr trans;
         xmlRegAtomPtr atom;
  
@@ -3081,12 +3283,22 @@ xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
                      * this is a multiple input sequence
                      * If there is a counter associated increment it now.
                      * before potentially saving and rollback
+                    * do not increment if the counter is already over the
+                    * maximum limit in which case get to next transition
                      */
                     if (trans->counter >= 0) {
-                       if (exec->counts == NULL) {
+                       xmlRegCounterPtr counter;
+
+                       if ((exec->counts == NULL) ||
+                           (exec->comp == NULL) ||
+                           (exec->comp->counters == NULL)) {
                             exec->status = -1;
                             goto error;
                         }
+                       counter = &exec->comp->counters[trans->counter];
+                       if (exec->counts[trans->counter] >= counter->max)
+                           continue; /* for loop on transitions */
+
  #ifdef DEBUG_REGEXP_EXEC
                         printf("Increasing count %d\n", trans->counter);
  #endif
@@ -3182,10 +3394,18 @@ xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
                     xmlFARegExecSave(exec);
                 }
                 if (trans->counter >= 0) {
-                   if (exec->counts == NULL) {
-                       exec->status = -1;
+                   xmlRegCounterPtr counter;
+
+                    /* make sure we don't go over the counter maximum value */
+                   if ((exec->counts == NULL) ||
+                       (exec->comp == NULL) ||
+                       (exec->comp->counters == NULL)) {
+                       exec->status = -1;
                         goto error;
                     }
+                   counter = &exec->comp->counters[trans->counter];
+                   if (exec->counts[trans->counter] >= counter->max)
+                       continue; /* for loop on transitions */
  #ifdef DEBUG_REGEXP_EXEC
                     printf("Increasing count %d\n", trans->counter);
  #endif
@@ -3243,6 +3463,8 @@ error:
         }
         xmlFree(exec->rollbacks);
      }
+    if (exec->state == NULL)
+        return(-1);
      if (exec->counts != NULL)
         xmlFree(exec->counts);
      if (exec->status == 0)
@@ -3256,9 +3478,9 @@ error:
  }
  
  /************************************************************************
- *                                                                     *
+ *                                                                     *
   *     Progressive interface to the verifier one atom at a time        *
- *                                                                     *
+ *                                                                     *
   ************************************************************************/
  #ifdef DEBUG_ERR
  static void testerr(xmlRegExecCtxtPtr exec);
@@ -3375,7 +3597,7 @@ xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
  #endif
      if (exec->inputStackMax == 0) {
         exec->inputStackMax = 4;
-       exec->inputStack = (xmlRegInputTokenPtr) 
+       exec->inputStack = (xmlRegInputTokenPtr)
             xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
         if (exec->inputStack == NULL) {
             xmlRegexpErrMemory(NULL, "pushing input string");
@@ -3404,11 +3626,11 @@ xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
  
  /**
   * xmlRegStrEqualWildcard:
- * @expStr:  the string to be evaluated 
+ * @expStr:  the string to be evaluated
   * @valStr:  the validation string
   *
   * Checks if both strings are equal or have the same content. "*"
- * can be used as a wildcard in @valStr; "|" is used as a seperator of 
+ * can be used as a wildcard in @valStr; "|" is used as a seperator of
   * substrings in both @expStr and @valStr.
   *
   * Returns 1 if the comparison is satisfied and the number of substrings
@@ -3474,7 +3696,7 @@ xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
  
      if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
         return(-1);
-    
+
      if (value == NULL) {
         /*
          * are we at a final state ?
@@ -3495,9 +3717,9 @@ xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
      for (i = 0;i < comp->nbstrings;i++) {
         target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
         if ((target > 0) && (target <= comp->nbstates)) {
-           target--; /* to avoid 0 */    
+           target--; /* to avoid 0 */
             if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
-               exec->index = target;           
+               exec->index = target;
                 if ((exec->callback != NULL) && (comp->transdata != NULL)) {
                     exec->callback(exec->data, value,
                           comp->transdata[state * comp->nbstrings + i], data);
@@ -3631,7 +3853,7 @@ xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
                             continue;
                         counter = &exec->comp->counters[t->counter];
                         count = exec->counts[t->counter];
-                       if ((count < counter->max) && 
+                       if ((count < counter->max) &&
                             (t->atom != NULL) &&
                             (xmlStrEqual(value, t->atom->valuep))) {
                             ret = 0;
@@ -3871,7 +4093,7 @@ rollback:
              */
             exec->determinist = 0;
             xmlFARegExecRollBack(exec);
-           if (exec->status == 0) {
+           if ((exec->inputStack != NULL ) && (exec->status == 0)) {
                 value = exec->inputStack[exec->index].value;
                 data = exec->inputStack[exec->index].data;
  #ifdef DEBUG_PUSH
@@ -3989,7 +4211,7 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
      int maxval;
      int nb = 0;
  
-    if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) || 
+    if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||
          (values == NULL) || (*nbval <= 0))
          return(-1);
  
@@ -4086,7 +4308,7 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
                     (*nbval)++;
                 }
             } else {
-                if ((exec->comp->states[trans->to] != NULL) &&
+                if ((exec->comp != NULL) && (exec->comp->states[trans->to] != NULL) &&
                     (exec->comp->states[trans->to]->type !=
                      XML_REGEXP_SINK_STATE)) {
                     if (atom->neg)
@@ -4095,7 +4317,7 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
                         values[nb++] = (xmlChar *) atom->valuep;
                     (*nbval)++;
                 }
-           } 
+           }
         }
         for (transno = 0;
              (transno < state->nbTrans) && (nb < maxval);
@@ -4122,7 +4344,7 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
                         values[nb++] = (xmlChar *) atom->valuep;
                     (*nbneg)++;
                 }
-           } 
+           }
         }
      }
      return(0);
@@ -4353,10 +4575,10 @@ progress:
  }
  #endif
  /************************************************************************
- *                                                                     *
+ *                                                                     *
   *     Parser for the Schemas Datatype Regular Expressions             *
   *     http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs      *
- *                                                                     *
+ *                                                                     *
   ************************************************************************/
  
  /**
@@ -4385,7 +4607,7 @@ xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
   *
   * [27]   charProp   ::=   IsCategory | IsBlock
   * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |
- *                       Separators | Symbols | Others 
+ *                       Separators | Symbols | Others
   * [29]   Letters   ::=   'L' [ultmo]?
   * [30]   Marks   ::=   'M' [nce]?
   * [31]   Numbers   ::=   'N' [dlo]?
@@ -4400,7 +4622,7 @@ xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
      int cur;
      xmlRegAtomType type = (xmlRegAtomType) 0;
      xmlChar *blockName = NULL;
-    
+
      cur = CUR;
      if (cur == 'L') {
         NEXT;
@@ -4572,15 +4794,15 @@ xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
         NEXT;
         start = ctxt->cur;
         cur = CUR;
-       if (((cur >= 'a') && (cur <= 'z')) || 
-           ((cur >= 'A') && (cur <= 'Z')) || 
-           ((cur >= '0') && (cur <= '9')) || 
+       if (((cur >= 'a') && (cur <= 'z')) ||
+           ((cur >= 'A') && (cur <= 'Z')) ||
+           ((cur >= '0') && (cur <= '9')) ||
             (cur == 0x2D)) {
             NEXT;
             cur = CUR;
-           while (((cur >= 'a') && (cur <= 'z')) || 
-               ((cur >= 'A') && (cur <= 'Z')) || 
-               ((cur >= '0') && (cur <= '9')) || 
+           while (((cur >= 'a') && (cur <= 'z')) ||
+               ((cur >= 'A') && (cur <= 'Z')) ||
+               ((cur >= '0') && (cur <= '9')) ||
                 (cur == 0x2D)) {
                 NEXT;
                 cur = CUR;
@@ -4606,7 +4828,7 @@ xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
   * xmlFAParseCharClassEsc:
   * @ctxt:  a regexp parser context
   *
- * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc ) 
+ * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
   * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
   * [25] catEsc   ::=   '\p{' charProp '}'
   * [26] complEsc ::=   '\P{' charProp '}'
@@ -4682,6 +4904,17 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
                 }
             }
         } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
+            switch (cur) {
+                case 'n':
+                    cur = '\n';
+                    break;
+                case 'r':
+                    cur = '\r';
+                    break;
+                case 't':
+                    cur = '\t';
+                    break;
+            }
             xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
                                XML_REGEXP_CHARVAL, cur, cur, NULL);
         }
@@ -4692,34 +4925,34 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
         xmlRegAtomType type = XML_REGEXP_ANYSPACE;
  
         switch (cur) {
-           case 's': 
+           case 's':
                 type = XML_REGEXP_ANYSPACE;
                 break;
-           case 'S': 
+           case 'S':
                 type = XML_REGEXP_NOTSPACE;
                 break;
-           case 'i': 
+           case 'i':
                 type = XML_REGEXP_INITNAME;
                 break;
-           case 'I': 
+           case 'I':
                 type = XML_REGEXP_NOTINITNAME;
                 break;
-           case 'c': 
+           case 'c':
                 type = XML_REGEXP_NAMECHAR;
                 break;
-           case 'C': 
+           case 'C':
                 type = XML_REGEXP_NOTNAMECHAR;
                 break;
-           case 'd': 
+           case 'd':
                 type = XML_REGEXP_DECIMAL;
                 break;
-           case 'D': 
+           case 'D':
                 type = XML_REGEXP_NOTDECIMAL;
                 break;
-           case 'w': 
+           case 'w':
                 type = XML_REGEXP_REALCHAR;
                 break;
-           case 'W': 
+           case 'W':
                 type = XML_REGEXP_NOTREALCHAR;
                 break;
         }
@@ -4730,72 +4963,16 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
             xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
                                type, 0, 0, NULL);
         }
-    }
-}
-
-/**
- * xmlFAParseCharRef:
- * @ctxt:  a regexp parser context
- *
- * [19]   XmlCharRef   ::=   ( '&#' [0-9]+ ';' ) | (' &#x' [0-9a-fA-F]+ ';' )
- */
-static int
-xmlFAParseCharRef(xmlRegParserCtxtPtr ctxt) {
-    int ret = 0, cur;
-
-    if ((CUR != '&') || (NXT(1) != '#'))
-       return(-1);
-    NEXT;
-    NEXT;
-    cur = CUR;
-    if (cur == 'x') {
-       NEXT;
-       cur = CUR;
-       if (((cur >= '0') && (cur <= '9')) ||
-           ((cur >= 'a') && (cur <= 'f')) ||
-           ((cur >= 'A') && (cur <= 'F'))) {
-           while (((cur >= '0') && (cur <= '9')) ||
-                  ((cur >= 'a') && (cur <= 'f')) ||
-                  ((cur >= 'A') && (cur <= 'F'))) {
-               if ((cur >= '0') && (cur <= '9'))
-                   ret = ret * 16 + cur - '0';
-               else if ((cur >= 'a') && (cur <= 'f'))
-                   ret = ret * 16 + 10 + (cur - 'a');
-               else
-                   ret = ret * 16 + 10 + (cur - 'A');
-               NEXT;
-               cur = CUR;
-           }
-       } else {
-           ERROR("Char ref: expecting [0-9A-F]");
-           return(-1);
-       }
-    } else {
-       if ((cur >= '0') && (cur <= '9')) {
-           while ((cur >= '0') && (cur <= '9')) {
-               ret = ret * 10 + cur - '0';
-               NEXT;
-               cur = CUR;
-           }
-       } else {
-           ERROR("Char ref: expecting [0-9]");
-           return(-1);
-       }
-    }
-    if (cur != ';') {
-       ERROR("Char ref: expecting ';'");
-       return(-1);
      } else {
-       NEXT;
+       ERROR("Wrong escape sequence, misuse of character '\\'");
      }
-    return(ret);
  }
  
  /**
   * xmlFAParseCharRange:
   * @ctxt:  a regexp parser context
   *
- * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash 
+ * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash
   * [18]   seRange   ::=   charOrEsc '-' charOrEsc
   * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc
   * [21]   XmlChar   ::=   [^\#x2D#x5B#x5D]
@@ -4812,12 +4989,6 @@ xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
         return;
      }
  
-    if ((CUR == '&') && (NXT(1) == '#')) {
-       end = start = xmlFAParseCharRef(ctxt);
-        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
-                          XML_REGEXP_CHARVAL, start, end, NULL);
-       return;
-    }
      cur = CUR;
      if (cur == '\\') {
         NEXT;
@@ -4842,10 +5013,15 @@ xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
         ERROR("Expecting a char range");
         return;
      }
-    NEXTL(len);
-    if (start == '-') {
+    /*
+     * Since we are "inside" a range, we can assume ctxt->cur is past
+     * the start of ctxt->string, and PREV should be safe
+     */
+    if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) {
+       NEXTL(len);
         return;
      }
+    NEXTL(len);
      cur = CUR;
      if ((cur != '-') || (NXT(1) == ']')) {
          xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
@@ -4896,7 +5072,7 @@ xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
  static void
  xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
      do {
-       if ((CUR == '\\') || (CUR == '.')) {
+       if (CUR == '\\') {
             xmlFAParseCharClassEsc(ctxt);
         } else {
             xmlFAParseCharRange(ctxt);
@@ -4911,7 +5087,7 @@ xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
   *
   * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub
   * [15]   negCharGroup ::= '^' posCharGroup
- * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr  
+ * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
   * [12]   charClassExpr ::= '[' charGroup ']'
   */
  static void
@@ -5085,9 +5261,15 @@ xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
      } else if (CUR == ')') {
         return(0);
      } else if (CUR == '(') {
-       xmlRegStatePtr start, oldend;
+       xmlRegStatePtr start, oldend, start0;
  
         NEXT;
+       /*
+        * this extra Epsilon transition is needed if we count with 0 allowed
+        * unfortunately this can't be known at that point
+        */
+       xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
+       start0 = ctxt->state;
         xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
         start = ctxt->state;
         oldend = ctxt->end;
@@ -5103,6 +5285,7 @@ xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
         if (ctxt->atom == NULL)
             return(-1);
         ctxt->atom->start = start;
+       ctxt->atom->start0 = start0;
         ctxt->atom->stop = ctxt->state;
         ctxt->end = oldend;
         return(1);
@@ -5152,7 +5335,7 @@ xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
      previous = ctxt->state;
      ret = xmlFAParsePiece(ctxt);
      if (ret != 0) {
-       if (xmlFAGenerateTransitions(ctxt, previous, 
+       if (xmlFAGenerateTransitions(ctxt, previous,
                 (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
             return(-1);
         previous = ctxt->state;
@@ -5161,7 +5344,7 @@ xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
      while ((ret != 0) && (ctxt->error == 0)) {
         ret = xmlFAParsePiece(ctxt);
         if (ret != 0) {
-           if (xmlFAGenerateTransitions(ctxt, previous, 
+           if (xmlFAGenerateTransitions(ctxt, previous,
                     (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
                     return(-1);
             previous = ctxt->state;
@@ -5199,6 +5382,10 @@ xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
      end = ctxt->state;
      while ((CUR == '|') && (ctxt->error == 0)) {
         NEXT;
+       if (CUR == 0) {
+           ERROR("expecting a branch after |")
+           return;
+       }
         ctxt->state = start;
         ctxt->end = NULL;
         xmlFAParseBranch(ctxt, end);
@@ -5210,9 +5397,9 @@ xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
  }
  
  /************************************************************************
- *                                                                     *
- *                     The basic API                                   *
- *                                                                     *
+ *                                                                     *
+ *                     The basic API                                   *
+ *                                                                     *
   ************************************************************************/
  
  /**
@@ -5281,6 +5468,10 @@ xmlRegexpCompile(const xmlChar *regexp) {
      if (CUR != 0) {
         ERROR("xmlFAParseRegExp: extra characters");
      }
+    if (ctxt->error != 0) {
+       xmlRegFreeParserCtxt(ctxt);
+       return(NULL);
+    }
      ctxt->end = ctxt->state;
      ctxt->start->type = XML_REGEXP_START_STATE;
      ctxt->end->type = XML_REGEXP_FINAL_STATE;
@@ -5345,10 +5536,12 @@ xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
      am->nbStates = comp->nbStates;
      am->states = comp->states;
      am->determinist = -1;
+    am->flags = comp->flags;
      ret = xmlFAComputesDeterminism(am);
      am->atoms = NULL;
      am->states = NULL;
      xmlFreeAutomata(am);
+    comp->determinist = ret;
      return(ret);
  }
  
@@ -5393,9 +5586,9 @@ xmlRegFreeRegexp(xmlRegexpPtr regexp) {
  
  #ifdef LIBXML_AUTOMATA_ENABLED
  /************************************************************************
- *                                                                     *
- *                     The Automata interface                          *
- *                                                                     *
+ *                                                                     *
+ *                     The Automata interface                          *
+ *                                                                     *
   ************************************************************************/
  
  /**
@@ -5426,6 +5619,7 @@ xmlNewAutomata(void) {
         xmlFreeAutomata(ctxt);
         return(NULL);
      }
+    ctxt->flags = 0;
  
      return(ctxt);
  }
@@ -5444,6 +5638,20 @@ xmlFreeAutomata(xmlAutomataPtr am) {
  }
  
  /**
+ * xmlAutomataSetFlags:
+ * @am: an automata
+ * @flags:  a set of internal flags
+ *
+ * Set some flags on the automata
+ */
+void
+xmlAutomataSetFlags(xmlAutomataPtr am, int flags) {
+    if (am == NULL)
+       return;
+    am->flags |= flags;
+}
+
+/**
   * xmlAutomataGetInitState:
   * @am: an automata
   *
@@ -5501,8 +5709,6 @@ xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
      if (atom == NULL)
          return(NULL);
      atom->data = data;
-    if (atom == NULL)
-       return(NULL);
      atom->valuep = xmlStrdup(token);
  
      if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
@@ -5651,7 +5857,7 @@ xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
   *
   * If @to is NULL, this creates first a new target state in the automata
   * and then adds a transition from the @from state to the target state
- * activated by a succession of input of value @token and @token2 and 
+ * activated by a succession of input of value @token and @token2 and
   * whose number is between @min and @max
   *
   * Returns the target state or NULL in case of error
@@ -5805,8 +6011,8 @@ xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
   *
   * If @to is NULL, this creates first a new target state in the automata
   * and then adds a transition from the @from state to the target state
- * activated by a succession of input of value @token and @token2 and whose 
- * number is between @min and @max, moreover that transition can only be 
+ * activated by a succession of input of value @token and @token2 and whose
+ * number is between @min and @max, moreover that transition can only be
   * crossed once.
   *
   * Returns the target state or NULL in case of error
@@ -5848,7 +6054,7 @@ xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
         str[lenn + lenp + 1] = 0;
  
         atom->valuep = str;
-    }    
+    }
      atom->data = data;
      atom->quant = XML_REGEXP_QUANT_ONCEONLY;
      atom->min = min;
@@ -5871,7 +6077,7 @@ xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
      return(to);
  }
  
-    
+
  
  /**
   * xmlAutomataNewOnceTrans:
@@ -5940,7 +6146,7 @@ xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
   */
  xmlAutomataStatePtr
  xmlAutomataNewState(xmlAutomataPtr am) {
-    xmlAutomataStatePtr to; 
+    xmlAutomataStatePtr to;
  
      if (am == NULL)
         return(NULL);
@@ -6007,7 +6213,7 @@ xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
   *
   * Returns the counter number or -1 in case of error
   */
-int            
+int
  xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
      int ret;
  
@@ -6079,7 +6285,7 @@ xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
   *
   * Returns the compiled regexp or NULL in case of error
   */
-xmlRegexpPtr          
+xmlRegexpPtr
  xmlAutomataCompile(xmlAutomataPtr am) {
      xmlRegexpPtr ret;
  
@@ -6099,7 +6305,7 @@ xmlAutomataCompile(xmlAutomataPtr am) {
   *
   * Returns 1 if true, 0 if not, and -1 in case of error
   */
-int          
+int
  xmlAutomataIsDeterminist(xmlAutomataPtr am) {
      int ret;
  
@@ -6129,6 +6335,7 @@ struct _xmlExpCtxt {
      int size;
      int nbElems;
      int nb_nodes;
+    int maxNodes;
      const char *expr;
      const char *cur;
      int nb_cons;
@@ -6151,13 +6358,14 @@ xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
  
      if (maxNodes <= 4096)
          maxNodes = 4096;
-    
+
      ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
      if (ret == NULL)
          return(NULL);
      memset(ret, 0, sizeof(xmlExpCtxt));
      ret->size = size;
      ret->nbElems = 0;
+    ret->maxNodes = maxNodes;
      ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
      if (ret->table == NULL) {
          xmlFree(ret);
@@ -6204,7 +6412,7 @@ xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
  /* #define DEBUG_DERIV */
  
  /*
- * TODO: 
+ * TODO:
   * - Wildcards
   * - public API for creation
   *
@@ -6272,7 +6480,7 @@ static unsigned short
  xmlExpHashNameComputeKey(const xmlChar *name) {
      unsigned short value = 0L;
      char ch;
-    
+
      if (name != NULL) {
         value += 30 * (*name);
         while ((ch = *name++) != 0) {
@@ -6291,7 +6499,7 @@ xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
                       xmlExpNodePtr right) {
      unsigned long value;
      unsigned short ret;
-    
+
      switch (type) {
          case XML_EXP_SEQ:
             value = left->key;
@@ -6432,7 +6640,7 @@ xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
             left->exp_left->ref++;
             tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
                                      NULL, 0, 0);
-       
+
             xmlExpFree(ctxt, left);
             return(tmp);
         }
@@ -6489,7 +6697,7 @@ xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
             return(right);
         }
         kbase = xmlExpHashComputeKey(type, left, right);
-    } else 
+    } else
          return(NULL);
  
      key = kbase % ctxt->size;
@@ -6630,7 +6838,7 @@ xmlExpRef(xmlExpNodePtr exp) {
   * xmlExpNewAtom:
   * @ctxt: the expression context
   * @name: the atom name
- * @len: the atom name lenght in byte (or -1);
+ * @len: the atom name length in byte (or -1);
   *
   * Get the atom associated to this name from that context
   *
@@ -6730,7 +6938,7 @@ xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) {
   ************************************************************************/
  
  static int
-xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
+xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
                       const xmlChar**list, int len, int nb) {
      int tmp, tmp2;
  tail:
@@ -6743,7 +6951,7 @@ tail:
                     return(0);
              if (nb >= len)
                 return(-2);
-           list[nb++] = exp->exp_str;
+           list[nb] = exp->exp_str;
             return(1);
          case XML_EXP_COUNT:
             exp = exp->exp_left;
@@ -6767,7 +6975,7 @@ tail:
   * @ctxt: the expression context
   * @exp: the expression
   * @langList: where to store the tokens
- * @len: the allocated lenght of @list
+ * @len: the allocated length of @list
   *
   * Find all the strings used in @exp and store them in @list
   *
@@ -6775,7 +6983,7 @@ tail:
   *         -2 if there is more than @len strings
   */
  int
-xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
+xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
                    const xmlChar**langList, int len) {
      if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0))
          return(-1);
@@ -6783,7 +6991,7 @@ xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
  }
  
  static int
-xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
+xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
                    const xmlChar**list, int len, int nb) {
      int tmp, tmp2;
  tail:
@@ -6798,7 +7006,7 @@ tail:
                     return(0);
              if (nb >= len)
                 return(-2);
-           list[nb++] = exp->exp_str;
+           list[nb] = exp->exp_str;
             return(1);
          case XML_EXP_COUNT:
             exp = exp->exp_left;
@@ -6833,7 +7041,7 @@ tail:
   * @ctxt: the expression context
   * @exp: the expression
   * @tokList: where to store the tokens
- * @len: the allocated lenght of @list
+ * @len: the allocated length of @list
   *
   * Find all the strings that appears at the start of the languages
   * accepted by @exp and store them in @list. E.g. for (a, b) | c
@@ -6843,7 +7051,7 @@ tail:
   *         -2 if there is more than @len strings
   */
  int
-xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
+xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
                 const xmlChar**tokList, int len) {
      if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0))
          return(-1);
@@ -7540,7 +7748,7 @@ xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
      xmlFree((xmlChar **) tab);
      return(ret);
  }
-    
+
  /**
   * xmlExpExpDerive:
   * @ctxt: the expressions context
@@ -7592,7 +7800,7 @@ xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
  int
  xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
      xmlExpNodePtr tmp;
-    
+
      if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
          return(-1);
  
@@ -7636,7 +7844,7 @@ xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
  
  /************************************************************************
   *                                                                     *
- *                     Parsing expression                              *
+ *                     Parsing expression                              *
   *                                                                     *
   ************************************************************************/
  
@@ -7740,7 +7948,7 @@ parse_quantifier:
         ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
                                  0, -1);
         SKIP_BLANKS
-    } 
+    }
      return(ret);
  }
  
@@ -7862,7 +8070,7 @@ xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
              break;
          case XML_EXP_COUNT: {
             char rep[40];
-           
+
             c = expr->exp_left;
             if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
                 xmlExpDumpInt(buf, c, 1);