Merge branch 'tizen_base' of ssh://review.tizen.org:29418/platform/upstream/libxml2...

[platform/upstream/libxml2.git] / xmlregexp.c
diff --git a/xmlregexp.c b/xmlregexp.c

index 8a8be98..ca3b4f4 100644 (file)
--- a/xmlregexp.c
+++ b/xmlregexp.c
@@ -1,7 +1,7 @@
  /*
   * regexp.c: generic and extensible Regular Expression engine
   *
- * Basically designed with the purpose of compiling regexps for 
+ * Basically designed with the purpose of compiling regexps for
   * the variety of validation/shemas mechanisms now available in
   * XML related specifications these include:
   *    - XML-1.0 DTD validation
@@ -68,15 +68,15 @@
   *
   * macro to flag unimplemented blocks
   */
-#define TODO                                                           \
+#define TODO                                                           \
      xmlGenericError(xmlGenericErrorContext,                            \
             "Unimplemented block at %s:%d\n",                           \
              __FILE__, __LINE__);
  
  /************************************************************************
- *                                                                     *
- *                     Datatypes and structures                        *
- *                                                                     *
+ *                                                                     *
+ *                     Datatypes and structures                        *
+ *                                                                     *
   ************************************************************************/
  
  /*
@@ -222,6 +222,7 @@ struct _xmlRegTrans {
  struct _xmlAutomataState {
      xmlRegStateType type;
      xmlRegMarkedType mark;
+    xmlRegMarkedType markd;
      xmlRegMarkedType reached;
      int no;
      int maxTrans;
@@ -364,7 +365,7 @@ void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
  
  /************************************************************************
   *                                                                     *
- *             Regexp memory error handler                             *
+ *             Regexp memory error handler                             *
   *                                                                     *
   ************************************************************************/
  /**
@@ -411,9 +412,9 @@ xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
  }
  
  /************************************************************************
- *                                                                     *
- *                     Allocation/Deallocation                         *
- *                                                                     *
+ *                                                                     *
+ *                     Allocation/Deallocation                         *
+ *                                                                     *
   ************************************************************************/
  
  static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
@@ -934,9 +935,9 @@ xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
  }
  
  /************************************************************************
- *                                                                     *
- *                     Display of Data structures                      *
- *                                                                     *
+ *                                                                     *
+ *                     Display of Data structures                      *
+ *                                                                     *
   ************************************************************************/
  
  static void
@@ -1143,7 +1144,7 @@ xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
         fprintf(output, "char %c ", trans->atom->codepoint);
      fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
  }
-    
+
  static void
  xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
      int i;
@@ -1157,7 +1158,7 @@ xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
         fprintf(output, "START ");
      if (state->type == XML_REGEXP_FINAL_STATE)
         fprintf(output, "FINAL ");
-    
+
      fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
      for (i = 0;i < state->nbTrans; i++) {
         xmlRegPrintTrans(output, &(state->trans[i]));
@@ -1207,12 +1208,12 @@ xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
  #endif
  
  /************************************************************************
- *                                                                     *
+ *                                                                     *
   *              Finite Automata structures manipulations               *
- *                                                                     *
+ *                                                                     *
   ************************************************************************/
  
-static void 
+static void
  xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
                    int neg, xmlRegAtomType type, int start, int end,
                    xmlChar *blockName) {
@@ -1252,7 +1253,7 @@ xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
         return;
      range->blockName = blockName;
      atom->ranges[atom->nbRanges++] = range;
-    
+
  }
  
  static int
@@ -1283,7 +1284,7 @@ xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
      return(ctxt->nbCounters++);
  }
  
-static int 
+static int
  xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
      if (atom == NULL) {
         ERROR("atom push: atom is NULL");
@@ -1315,7 +1316,7 @@ xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
      return(0);
  }
  
-static void 
+static void
  xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
                        int from) {
      if (target->maxTransTo == 0) {
@@ -1343,7 +1344,7 @@ xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
      target->nbTransTo++;
  }
  
-static void 
+static void
  xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
                     xmlRegAtomPtr atom, xmlRegStatePtr target,
                     int counter, int count) {
@@ -1409,7 +1410,7 @@ xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
         printf("counted %d\n", counter);
      else if (atom == NULL)
         printf("epsilon transition\n");
-    else if (atom != NULL) 
+    else if (atom != NULL)
          xmlRegPrintAtom(stdout, atom);
  #endif
  
@@ -1543,6 +1544,7 @@ static int
  xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
                          xmlRegStatePtr to, xmlRegAtomPtr atom) {
      xmlRegStatePtr end;
+    int nullable = 0;
  
      if (atom == NULL) {
         ERROR("genrate transition: atom == NULL");
@@ -1563,7 +1565,7 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
              */
             xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
  #ifdef DV
-       } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) && 
+       } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&
                    (atom->quant != XML_REGEXP_QUANT_ONCE)) {
             to = xmlRegNewState(ctxt);
             xmlRegStatePush(ctxt, to);
@@ -1577,7 +1579,7 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
                 /*
                  * transition done to the state after end of atom.
                  *      1. set transition from atom start to new state
-                *      2. set transition from atom end to this state. 
+                *      2. set transition from atom end to this state.
                  */
                  if (to == NULL) {
                      xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
@@ -1621,7 +1623,7 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
                     /*
                      * duplicate a transition based on atom to count next
                      * occurences after 1. We cannot loop to atom->start
-                    * directly because we need an epsilon transition to 
+                    * directly because we need an epsilon transition to
                      * newstate.
                      */
                      /* ???? For some reason it seems we never reach that
@@ -1680,7 +1682,7 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
                 break;
         }
         return(0);
-    } 
+    }
      if ((atom->min == 0) && (atom->max == 0) &&
                 (atom->quant == XML_REGEXP_QUANT_RANGE)) {
          /*
@@ -1706,9 +1708,9 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
         else {
             return(-1);
         }
-    } 
+    }
      end = to;
-    if ((atom->quant == XML_REGEXP_QUANT_MULT) || 
+    if ((atom->quant == XML_REGEXP_QUANT_MULT) ||
          (atom->quant == XML_REGEXP_QUANT_PLUS)) {
         /*
          * Do not pollute the target state by adding transitions from
@@ -1716,7 +1718,7 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
          * So isolate with an epsilon transition.
          */
          xmlRegStatePtr tmp;
-       
+
         tmp = xmlRegNewState(ctxt);
         if (tmp != NULL)
             xmlRegStatePush(ctxt, tmp);
@@ -1729,6 +1731,13 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
      if (xmlRegAtomPush(ctxt, atom) < 0) {
         return(-1);
      }
+    if ((atom->quant == XML_REGEXP_QUANT_RANGE) &&
+        (atom->min == 0) && (atom->max > 0)) {
+       nullable = 1;
+       atom->min = 1;
+        if (atom->max == 1)
+           atom->quant = XML_REGEXP_QUANT_OPT;
+    }
      xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
      ctxt->state = end;
      switch (atom->quant) {
@@ -1745,12 +1754,9 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
             atom->quant = XML_REGEXP_QUANT_ONCE;
             xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
             break;
-       case XML_REGEXP_QUANT_RANGE: 
-#if DV_test
-           if (atom->min == 0) {
+       case XML_REGEXP_QUANT_RANGE:
+           if (nullable)
                 xmlFAGenerateEpsilonTransition(ctxt, from, to);
-           }
-#endif
             break;
         default:
             break;
@@ -1762,7 +1768,7 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
   * xmlFAReduceEpsilonTransitions:
   * @ctxt:  a regexp parser context
   * @fromnr:  the from state
- * @tonr:  the to state 
+ * @tonr:  the to state
   * @counter:  should that transition be associated to a counted
   *
   */
@@ -1806,7 +1812,7 @@ xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
                     int newto = to->trans[transnr].to;
  
                     xmlRegStateAddTrans(ctxt, from, NULL,
-                                       ctxt->states[newto], 
+                                       ctxt->states[newto],
                                         -1, to->trans[transnr].count);
                 } else {
  #ifdef DEBUG_REGEXP_GRAPH
@@ -1828,11 +1834,11 @@ xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
             int newto = to->trans[transnr].to;
  
             if (to->trans[transnr].counter >= 0) {
-               xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 
-                                   ctxt->states[newto], 
+               xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
+                                   ctxt->states[newto],
                                     to->trans[transnr].counter, -1);
             } else {
-               xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 
+               xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
                                     ctxt->states[newto], counter, -1);
             }
         }
@@ -1844,7 +1850,7 @@ xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
   * xmlFAEliminateSimpleEpsilonTransitions:
   * @ctxt:  a regexp parser context
   *
- * Eliminating general epsilon transitions can get costly in the general 
+ * Eliminating general epsilon transitions can get costly in the general
   * algorithm due to the large amount of generated new transitions and
   * associated comparisons. However for simple epsilon transition used just
   * to separate building blocks when generating the automata this can be
@@ -1880,12 +1886,12 @@ xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
  #ifdef DEBUG_REGEXP_GRAPH
                 printf("Found simple epsilon trans from start %d to %d\n",
                        statenr, newto);
-#endif     
+#endif
              } else {
  #ifdef DEBUG_REGEXP_GRAPH
                 printf("Found simple epsilon trans from %d to %d\n",
                        statenr, newto);
-#endif     
+#endif
                 for (i = 0;i < state->nbTransTo;i++) {
                     tmp = ctxt->states[state->transTo[i]];
                     for (j = 0;j < tmp->nbTrans;j++) {
@@ -1893,10 +1899,10 @@ xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
  #ifdef DEBUG_REGEXP_GRAPH
                             printf("Changed transition %d on %d to go to %d\n",
                                    j, tmp->no, newto);
-#endif     
+#endif
                             tmp->trans[j].to = -1;
                             xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
-                                               ctxt->states[newto],
+                                               ctxt->states[newto],
                                                 tmp->trans[j].counter,
                                                 tmp->trans[j].count);
                         }
@@ -1910,7 +1916,7 @@ xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
                  state->type = XML_REGEXP_UNREACH_STATE;
  
             }
-            
+
         }
      }
  }
@@ -2112,7 +2118,7 @@ xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
         /*
          * just check all codepoints in the range for acceptance,
          * this is usually way cheaper since done only once at
-        * compilation than testing over and over at runtime or 
+        * compilation than testing over and over at runtime or
          * pushing too many states when evaluating.
          */
         if (((range1->neg == 0) && (range2->neg != 0)) ||
@@ -2589,6 +2595,8 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
  
      if (state == NULL)
         return(ret);
+    if (state->markd == XML_REGEXP_MARK_VISITED)
+       return(ret);
  
      if (ctxt->flags & AM_AUTOMATA_RNG)
          deep = 0;
@@ -2606,8 +2614,10 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
         if (t1->atom == NULL) {
             if (t1->to < 0)
                 continue;
+           state->markd = XML_REGEXP_MARK_VISITED;
             res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
                                            to, atom);
+           state->markd = 0;
             if (res == 0) {
                 ret = 0;
                 /* t1->nd = 1; */
@@ -2776,9 +2786,9 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
  }
  
  /************************************************************************
- *                                                                     *
+ *                                                                     *
   *     Routines to check input against transition atoms                *
- *                                                                     *
+ *                                                                     *
   ************************************************************************/
  
  static int
@@ -2807,7 +2817,7 @@ xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
          case XML_REGEXP_NOTINITNAME:
             neg = !neg;
          case XML_REGEXP_INITNAME:
-           ret = (IS_LETTER(codepoint) || 
+           ret = (IS_LETTER(codepoint) ||
                    (codepoint == '_') || (codepoint == ':'));
             break;
          case XML_REGEXP_NOTNAMECHAR:
@@ -3055,9 +3065,9 @@ xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
  }
  
  /************************************************************************
- *                                                                     *
+ *                                                                     *
   *     Saving and restoring state of an execution context              *
- *                                                                     *
+ *                                                                     *
   ************************************************************************/
  
  #ifdef DEBUG_REGEXP_EXEC
@@ -3157,8 +3167,10 @@ xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
             exec->status = -6;
             return;
         }
-       memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
+       if (exec->counts) {
+           memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
                exec->comp->nbCounters * sizeof(int));
+       }
      }
  
  #ifdef DEBUG_REGEXP_EXEC
@@ -3168,9 +3180,9 @@ xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
  }
  
  /************************************************************************
- *                                                                     *
+ *                                                                     *
   *     Verifier, running an input against a compiled regexp            *
- *                                                                     *
+ *                                                                     *
   ************************************************************************/
  
  static int
@@ -3202,7 +3214,7 @@ xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
          memset(exec->counts, 0, comp->nbCounters * sizeof(int));
      } else
         exec->counts = NULL;
-    while ((exec->status == 0) &&
+    while ((exec->status == 0) && (exec->state != NULL) &&
            ((exec->inputString[exec->index] != 0) ||
             ((exec->state != NULL) &&
              (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
@@ -3456,6 +3468,8 @@ error:
         }
         xmlFree(exec->rollbacks);
      }
+    if (exec->state == NULL)
+        return(-1);
      if (exec->counts != NULL)
         xmlFree(exec->counts);
      if (exec->status == 0)
@@ -3469,9 +3483,9 @@ error:
  }
  
  /************************************************************************
- *                                                                     *
+ *                                                                     *
   *     Progressive interface to the verifier one atom at a time        *
- *                                                                     *
+ *                                                                     *
   ************************************************************************/
  #ifdef DEBUG_ERR
  static void testerr(xmlRegExecCtxtPtr exec);
@@ -3588,7 +3602,7 @@ xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
  #endif
      if (exec->inputStackMax == 0) {
         exec->inputStackMax = 4;
-       exec->inputStack = (xmlRegInputTokenPtr) 
+       exec->inputStack = (xmlRegInputTokenPtr)
             xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
         if (exec->inputStack == NULL) {
             xmlRegexpErrMemory(NULL, "pushing input string");
@@ -3617,11 +3631,11 @@ xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
  
  /**
   * xmlRegStrEqualWildcard:
- * @expStr:  the string to be evaluated 
+ * @expStr:  the string to be evaluated
   * @valStr:  the validation string
   *
   * Checks if both strings are equal or have the same content. "*"
- * can be used as a wildcard in @valStr; "|" is used as a seperator of 
+ * can be used as a wildcard in @valStr; "|" is used as a seperator of
   * substrings in both @expStr and @valStr.
   *
   * Returns 1 if the comparison is satisfied and the number of substrings
@@ -3687,7 +3701,7 @@ xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
  
      if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
         return(-1);
-    
+
      if (value == NULL) {
         /*
          * are we at a final state ?
@@ -3708,9 +3722,9 @@ xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
      for (i = 0;i < comp->nbstrings;i++) {
         target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
         if ((target > 0) && (target <= comp->nbstates)) {
-           target--; /* to avoid 0 */    
+           target--; /* to avoid 0 */
             if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
-               exec->index = target;           
+               exec->index = target;
                 if ((exec->callback != NULL) && (comp->transdata != NULL)) {
                     exec->callback(exec->data, value,
                           comp->transdata[state * comp->nbstrings + i], data);
@@ -3844,7 +3858,7 @@ xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
                             continue;
                         counter = &exec->comp->counters[t->counter];
                         count = exec->counts[t->counter];
-                       if ((count < counter->max) && 
+                       if ((count < counter->max) &&
                             (t->atom != NULL) &&
                             (xmlStrEqual(value, t->atom->valuep))) {
                             ret = 0;
@@ -4084,7 +4098,7 @@ rollback:
              */
             exec->determinist = 0;
             xmlFARegExecRollBack(exec);
-           if (exec->status == 0) {
+           if ((exec->inputStack != NULL ) && (exec->status == 0)) {
                 value = exec->inputStack[exec->index].value;
                 data = exec->inputStack[exec->index].data;
  #ifdef DEBUG_PUSH
@@ -4202,7 +4216,7 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
      int maxval;
      int nb = 0;
  
-    if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) || 
+    if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||
          (values == NULL) || (*nbval <= 0))
          return(-1);
  
@@ -4299,7 +4313,7 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
                     (*nbval)++;
                 }
             } else {
-                if ((exec->comp->states[trans->to] != NULL) &&
+                if ((exec->comp != NULL) && (exec->comp->states[trans->to] != NULL) &&
                     (exec->comp->states[trans->to]->type !=
                      XML_REGEXP_SINK_STATE)) {
                     if (atom->neg)
@@ -4308,7 +4322,7 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
                         values[nb++] = (xmlChar *) atom->valuep;
                     (*nbval)++;
                 }
-           } 
+           }
         }
         for (transno = 0;
              (transno < state->nbTrans) && (nb < maxval);
@@ -4335,7 +4349,7 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
                         values[nb++] = (xmlChar *) atom->valuep;
                     (*nbneg)++;
                 }
-           } 
+           }
         }
      }
      return(0);
@@ -4566,10 +4580,10 @@ progress:
  }
  #endif
  /************************************************************************
- *                                                                     *
+ *                                                                     *
   *     Parser for the Schemas Datatype Regular Expressions             *
   *     http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs      *
- *                                                                     *
+ *                                                                     *
   ************************************************************************/
  
  /**
@@ -4598,7 +4612,7 @@ xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
   *
   * [27]   charProp   ::=   IsCategory | IsBlock
   * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |
- *                       Separators | Symbols | Others 
+ *                       Separators | Symbols | Others
   * [29]   Letters   ::=   'L' [ultmo]?
   * [30]   Marks   ::=   'M' [nce]?
   * [31]   Numbers   ::=   'N' [dlo]?
@@ -4613,7 +4627,7 @@ xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
      int cur;
      xmlRegAtomType type = (xmlRegAtomType) 0;
      xmlChar *blockName = NULL;
-    
+
      cur = CUR;
      if (cur == 'L') {
         NEXT;
@@ -4785,15 +4799,15 @@ xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
         NEXT;
         start = ctxt->cur;
         cur = CUR;
-       if (((cur >= 'a') && (cur <= 'z')) || 
-           ((cur >= 'A') && (cur <= 'Z')) || 
-           ((cur >= '0') && (cur <= '9')) || 
+       if (((cur >= 'a') && (cur <= 'z')) ||
+           ((cur >= 'A') && (cur <= 'Z')) ||
+           ((cur >= '0') && (cur <= '9')) ||
             (cur == 0x2D)) {
             NEXT;
             cur = CUR;
-           while (((cur >= 'a') && (cur <= 'z')) || 
-               ((cur >= 'A') && (cur <= 'Z')) || 
-               ((cur >= '0') && (cur <= '9')) || 
+           while (((cur >= 'a') && (cur <= 'z')) ||
+               ((cur >= 'A') && (cur <= 'Z')) ||
+               ((cur >= '0') && (cur <= '9')) ||
                 (cur == 0x2D)) {
                 NEXT;
                 cur = CUR;
@@ -4819,7 +4833,7 @@ xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
   * xmlFAParseCharClassEsc:
   * @ctxt:  a regexp parser context
   *
- * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc ) 
+ * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
   * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
   * [25] catEsc   ::=   '\p{' charProp '}'
   * [26] complEsc ::=   '\P{' charProp '}'
@@ -4916,34 +4930,34 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
         xmlRegAtomType type = XML_REGEXP_ANYSPACE;
  
         switch (cur) {
-           case 's': 
+           case 's':
                 type = XML_REGEXP_ANYSPACE;
                 break;
-           case 'S': 
+           case 'S':
                 type = XML_REGEXP_NOTSPACE;
                 break;
-           case 'i': 
+           case 'i':
                 type = XML_REGEXP_INITNAME;
                 break;
-           case 'I': 
+           case 'I':
                 type = XML_REGEXP_NOTINITNAME;
                 break;
-           case 'c': 
+           case 'c':
                 type = XML_REGEXP_NAMECHAR;
                 break;
-           case 'C': 
+           case 'C':
                 type = XML_REGEXP_NOTNAMECHAR;
                 break;
-           case 'd': 
+           case 'd':
                 type = XML_REGEXP_DECIMAL;
                 break;
-           case 'D': 
+           case 'D':
                 type = XML_REGEXP_NOTDECIMAL;
                 break;
-           case 'w': 
+           case 'w':
                 type = XML_REGEXP_REALCHAR;
                 break;
-           case 'W': 
+           case 'W':
                 type = XML_REGEXP_NOTREALCHAR;
                 break;
         }
@@ -4963,7 +4977,7 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
   * xmlFAParseCharRange:
   * @ctxt:  a regexp parser context
   *
- * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash 
+ * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash
   * [18]   seRange   ::=   charOrEsc '-' charOrEsc
   * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc
   * [21]   XmlChar   ::=   [^\#x2D#x5B#x5D]
@@ -5043,11 +5057,12 @@ xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
         ERROR("Expecting the end of a char range");
         return;
      }
-    NEXTL(len);
+
      /* TODO check that the values are acceptable character ranges for XML */
      if (end < start) {
         ERROR("End of range is before start of range");
      } else {
+        NEXTL(len);
          xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
                            XML_REGEXP_CHARVAL, start, end, NULL);
      }
@@ -5078,7 +5093,7 @@ xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
   *
   * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub
   * [15]   negCharGroup ::= '^' posCharGroup
- * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr  
+ * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
   * [12]   charClassExpr ::= '[' charGroup ']'
   */
  static void
@@ -5326,7 +5341,7 @@ xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
      previous = ctxt->state;
      ret = xmlFAParsePiece(ctxt);
      if (ret != 0) {
-       if (xmlFAGenerateTransitions(ctxt, previous, 
+       if (xmlFAGenerateTransitions(ctxt, previous,
                 (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
             return(-1);
         previous = ctxt->state;
@@ -5335,7 +5350,7 @@ xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
      while ((ret != 0) && (ctxt->error == 0)) {
         ret = xmlFAParsePiece(ctxt);
         if (ret != 0) {
-           if (xmlFAGenerateTransitions(ctxt, previous, 
+           if (xmlFAGenerateTransitions(ctxt, previous,
                     (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
                     return(-1);
             previous = ctxt->state;
@@ -5373,6 +5388,10 @@ xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
      end = ctxt->state;
      while ((CUR == '|') && (ctxt->error == 0)) {
         NEXT;
+       if (CUR == 0) {
+           ERROR("expecting a branch after |")
+           return;
+       }
         ctxt->state = start;
         ctxt->end = NULL;
         xmlFAParseBranch(ctxt, end);
@@ -5384,9 +5403,9 @@ xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
  }
  
  /************************************************************************
- *                                                                     *
- *                     The basic API                                   *
- *                                                                     *
+ *                                                                     *
+ *                     The basic API                                   *
+ *                                                                     *
   ************************************************************************/
  
  /**
@@ -5573,9 +5592,9 @@ xmlRegFreeRegexp(xmlRegexpPtr regexp) {
  
  #ifdef LIBXML_AUTOMATA_ENABLED
  /************************************************************************
- *                                                                     *
- *                     The Automata interface                          *
- *                                                                     *
+ *                                                                     *
+ *                     The Automata interface                          *
+ *                                                                     *
   ************************************************************************/
  
  /**
@@ -5696,8 +5715,6 @@ xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
      if (atom == NULL)
          return(NULL);
      atom->data = data;
-    if (atom == NULL)
-       return(NULL);
      atom->valuep = xmlStrdup(token);
  
      if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
@@ -5846,7 +5863,7 @@ xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
   *
   * If @to is NULL, this creates first a new target state in the automata
   * and then adds a transition from the @from state to the target state
- * activated by a succession of input of value @token and @token2 and 
+ * activated by a succession of input of value @token and @token2 and
   * whose number is between @min and @max
   *
   * Returns the target state or NULL in case of error
@@ -6000,8 +6017,8 @@ xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
   *
   * If @to is NULL, this creates first a new target state in the automata
   * and then adds a transition from the @from state to the target state
- * activated by a succession of input of value @token and @token2 and whose 
- * number is between @min and @max, moreover that transition can only be 
+ * activated by a succession of input of value @token and @token2 and whose
+ * number is between @min and @max, moreover that transition can only be
   * crossed once.
   *
   * Returns the target state or NULL in case of error
@@ -6043,7 +6060,7 @@ xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
         str[lenn + lenp + 1] = 0;
  
         atom->valuep = str;
-    }    
+    }
      atom->data = data;
      atom->quant = XML_REGEXP_QUANT_ONCEONLY;
      atom->min = min;
@@ -6066,7 +6083,7 @@ xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
      return(to);
  }
  
-    
+
  
  /**
   * xmlAutomataNewOnceTrans:
@@ -6135,7 +6152,7 @@ xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
   */
  xmlAutomataStatePtr
  xmlAutomataNewState(xmlAutomataPtr am) {
-    xmlAutomataStatePtr to; 
+    xmlAutomataStatePtr to;
  
      if (am == NULL)
         return(NULL);
@@ -6202,7 +6219,7 @@ xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
   *
   * Returns the counter number or -1 in case of error
   */
-int            
+int
  xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
      int ret;
  
@@ -6274,7 +6291,7 @@ xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
   *
   * Returns the compiled regexp or NULL in case of error
   */
-xmlRegexpPtr          
+xmlRegexpPtr
  xmlAutomataCompile(xmlAutomataPtr am) {
      xmlRegexpPtr ret;
  
@@ -6294,7 +6311,7 @@ xmlAutomataCompile(xmlAutomataPtr am) {
   *
   * Returns 1 if true, 0 if not, and -1 in case of error
   */
-int          
+int
  xmlAutomataIsDeterminist(xmlAutomataPtr am) {
      int ret;
  
@@ -6334,7 +6351,7 @@ struct _xmlExpCtxt {
  /**
   * xmlExpNewCtxt:
   * @maxNodes:  the maximum number of nodes
- * @dict:  optional dictionnary to use internally
+ * @dict:  optional dictionary to use internally
   *
   * Creates a new context for manipulating expressions
   *
@@ -6347,7 +6364,7 @@ xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
  
      if (maxNodes <= 4096)
          maxNodes = 4096;
-    
+
      ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
      if (ret == NULL)
          return(NULL);
@@ -6401,7 +6418,7 @@ xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
  /* #define DEBUG_DERIV */
  
  /*
- * TODO: 
+ * TODO:
   * - Wildcards
   * - public API for creation
   *
@@ -6469,7 +6486,7 @@ static unsigned short
  xmlExpHashNameComputeKey(const xmlChar *name) {
      unsigned short value = 0L;
      char ch;
-    
+
      if (name != NULL) {
         value += 30 * (*name);
         while ((ch = *name++) != 0) {
@@ -6488,7 +6505,7 @@ xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
                       xmlExpNodePtr right) {
      unsigned long value;
      unsigned short ret;
-    
+
      switch (type) {
          case XML_EXP_SEQ:
             value = left->key;
@@ -6629,7 +6646,7 @@ xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
             left->exp_left->ref++;
             tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
                                      NULL, 0, 0);
-       
+
             xmlExpFree(ctxt, left);
             return(tmp);
         }
@@ -6686,7 +6703,7 @@ xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
             return(right);
         }
         kbase = xmlExpHashComputeKey(type, left, right);
-    } else 
+    } else
          return(NULL);
  
      key = kbase % ctxt->size;
@@ -6827,7 +6844,7 @@ xmlExpRef(xmlExpNodePtr exp) {
   * xmlExpNewAtom:
   * @ctxt: the expression context
   * @name: the atom name
- * @len: the atom name lenght in byte (or -1);
+ * @len: the atom name length in byte (or -1);
   *
   * Get the atom associated to this name from that context
   *
@@ -6927,7 +6944,7 @@ xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) {
   ************************************************************************/
  
  static int
-xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
+xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
                       const xmlChar**list, int len, int nb) {
      int tmp, tmp2;
  tail:
@@ -6964,7 +6981,7 @@ tail:
   * @ctxt: the expression context
   * @exp: the expression
   * @langList: where to store the tokens
- * @len: the allocated lenght of @list
+ * @len: the allocated length of @list
   *
   * Find all the strings used in @exp and store them in @list
   *
@@ -6972,7 +6989,7 @@ tail:
   *         -2 if there is more than @len strings
   */
  int
-xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
+xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
                    const xmlChar**langList, int len) {
      if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0))
          return(-1);
@@ -6980,7 +6997,7 @@ xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
  }
  
  static int
-xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
+xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
                    const xmlChar**list, int len, int nb) {
      int tmp, tmp2;
  tail:
@@ -7030,7 +7047,7 @@ tail:
   * @ctxt: the expression context
   * @exp: the expression
   * @tokList: where to store the tokens
- * @len: the allocated lenght of @list
+ * @len: the allocated length of @list
   *
   * Find all the strings that appears at the start of the languages
   * accepted by @exp and store them in @list. E.g. for (a, b) | c
@@ -7040,7 +7057,7 @@ tail:
   *         -2 if there is more than @len strings
   */
  int
-xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
+xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
                 const xmlChar**tokList, int len) {
      if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0))
          return(-1);
@@ -7193,7 +7210,7 @@ xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
          return(NULL);
      }
      /*
-     * check the string is in the dictionnary, if yes use an interned
+     * check the string is in the dictionary, if yes use an interned
       * copy, otherwise we know it's not an acceptable input
       */
      input = xmlDictExists(ctxt->dict, str, len);
@@ -7737,7 +7754,7 @@ xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
      xmlFree((xmlChar **) tab);
      return(ret);
  }
-    
+
  /**
   * xmlExpExpDerive:
   * @ctxt: the expressions context
@@ -7789,7 +7806,7 @@ xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
  int
  xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
      xmlExpNodePtr tmp;
-    
+
      if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
          return(-1);
  
@@ -7833,7 +7850,7 @@ xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
  
  /************************************************************************
   *                                                                     *
- *                     Parsing expression                              *
+ *                     Parsing expression                              *
   *                                                                     *
   ************************************************************************/
  
@@ -7937,7 +7954,7 @@ parse_quantifier:
         ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
                                  0, -1);
         SKIP_BLANKS
-    } 
+    }
      return(ret);
  }
  
@@ -8059,7 +8076,7 @@ xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
              break;
          case XML_EXP_COUNT: {
             char rep[40];
-           
+
             c = expr->exp_left;
             if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
                 xmlExpDumpInt(buf, c, 1);