Big fixes of pattern compilations
authorNick Wellnhofer <wellnhofer@aevum.de>
Thu, 17 Sep 2009 13:23:46 +0000 (15:23 +0200)
committerDaniel Veillard <veillard@redhat.com>
Thu, 17 Sep 2009 13:23:46 +0000 (15:23 +0200)
The problem is that "@node()", "attribute::node()", "child::node()" and
"node()" are all handled in different code paths and only the latter
works.

Then, I noticed that the handling of match="child::name" is wrong. It
matches the parents of <name> elements although it should be treated
exactly like match="name". So the whole XSLT_OP_CHILD stuff is
unneeded.

I also found that xsltScanName behaves a bit strange with regard to
':' characters. It doesn't parse an XML Name like the documentation
says. It's better to use xsltScanNCName instead.

Another minor issue is that the parser currently allows invalid
expressions like match="element*" because of lines 1745-1747 in
pattern.c in trunk.

* libxslt/pattern.c: fix all those problems
* tests/REC/Makefile.am tests/REC/test-5.2-19* tests/REC/test-5.2-20*
  tests/REC/test-5.2-21*: add test cases to the regression suite

libxslt/pattern.c
tests/REC/Makefile.am
tests/REC/test-5.2-19.out [new file with mode: 0644]
tests/REC/test-5.2-19.xml [new file with mode: 0644]
tests/REC/test-5.2-19.xsl [new file with mode: 0644]
tests/REC/test-5.2-20.out [new file with mode: 0644]
tests/REC/test-5.2-20.xml [new file with mode: 0644]
tests/REC/test-5.2-20.xsl [new file with mode: 0644]
tests/REC/test-5.2-21.out [new file with mode: 0644]
tests/REC/test-5.2-21.xml [new file with mode: 0644]
tests/REC/test-5.2-21.xsl [new file with mode: 0644]

index c435367..8ce74e3 100644 (file)
@@ -46,7 +46,6 @@ typedef enum {
     XSLT_OP_END=0,
     XSLT_OP_ROOT,
     XSLT_OP_ELEM,
-    XSLT_OP_CHILD,
     XSLT_OP_ATTR,
     XSLT_OP_PARENT,
     XSLT_OP_ANCESTOR,
@@ -61,6 +60,11 @@ typedef enum {
     XSLT_OP_PREDICATE
 } xsltOp;
 
+typedef enum {
+    AXIS_CHILD=1,
+    AXIS_ATTRIBUTE
+} xsltAxis;
+
 typedef struct _xsltStepState xsltStepState;
 typedef xsltStepState *xsltStepStatePtr;
 struct _xsltStepState {
@@ -697,32 +701,6 @@ restart:
                        goto rollback;
                }
                continue;
-            case XSLT_OP_CHILD: {
-               xmlNodePtr lst;
-
-               if ((node->type != XML_ELEMENT_NODE) &&
-                   (node->type != XML_DOCUMENT_NODE) &&
-#ifdef LIBXML_DOCB_ENABLED
-                   (node->type != XML_DOCB_DOCUMENT_NODE) &&
-#endif
-                   (node->type != XML_HTML_DOCUMENT_NODE))
-                   goto rollback;
-
-               lst = node->children;
-
-               if (step->value != NULL) {
-                   while (lst != NULL) {
-                       if ((lst->type == XML_ELEMENT_NODE) &&
-                           (step->value[0] == lst->name[0]) &&
-                           (xmlStrEqual(step->value, lst->name)))
-                           break;
-                       lst = lst->next;
-                   }
-                   if (lst != NULL)
-                       continue;
-               }
-               goto rollback;
-           }
             case XSLT_OP_ATTR:
                if (node->type != XML_ATTRIBUTE_NODE)
                    goto rollback;
@@ -1330,46 +1308,6 @@ xsltScanLiteral(xsltParserContextPtr ctxt) {
 }
 
 /**
- * xsltScanName:
- * @ctxt:  the XPath Parser context
- *
- * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | 
- *                  CombiningChar | Extender
- *
- * [5] Name ::= (Letter | '_' | ':') (NameChar)*
- *
- * [6] Names ::= Name (S Name)*
- *
- * Returns the Name parsed or NULL
- */
-
-static xmlChar *
-xsltScanName(xsltParserContextPtr ctxt) {
-    const xmlChar *q, *cur;
-    xmlChar *ret = NULL;
-    int val, len;
-
-    SKIP_BLANKS;
-
-    cur = q = CUR_PTR;
-    val = xmlStringCurrentChar(NULL, cur, &len);
-    if (!IS_LETTER(val) && (val != '_') && (val != ':'))
-       return(NULL);
-
-    while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
-           (val == '.') || (val == '-') ||
-          (val == '_') || 
-          (IS_COMBINING(val)) ||
-          (IS_EXTENDER(val))) {
-       cur += len;
-       val = xmlStringCurrentChar(NULL, cur, &len);
-    }
-    ret = xmlStrndup(q, cur - q);
-    CUR_PTR = cur;
-    return(ret);
-}
-
-/**
  * xsltScanNCName:
  * @ctxt:  the XPath Parser context
  *
@@ -1404,30 +1342,6 @@ xsltScanNCName(xsltParserContextPtr ctxt) {
     return(ret);
 }
 
-/**
- * xsltScanQName:
- * @ctxt:  the XPath Parser context
- * @prefix:  the place to store the prefix
- *
- * Parse a qualified name
- *
- * Returns the Name parsed or NULL
- */
-
-static xmlChar *
-xsltScanQName(xsltParserContextPtr ctxt, xmlChar **prefix) {
-    xmlChar *ret = NULL;
-
-    *prefix = NULL;
-    ret = xsltScanNCName(ctxt);
-    if (ret && CUR == ':') {
-        *prefix = ret;
-       NEXT;
-       ret = xsltScanNCName(ctxt);
-    }
-    return(ret);
-}
-
 /*
  * xsltCompileIdKeyPattern:
  * @ctxt:  the compilation context
@@ -1447,7 +1361,7 @@ xsltScanQName(xsltParserContextPtr ctxt, xmlChar **prefix) {
  */
 static void
 xsltCompileIdKeyPattern(xsltParserContextPtr ctxt, xmlChar *name,
-               int aid, int novar) {
+               int aid, int novar, xsltAxis axis) {
     xmlChar *lit = NULL;
     xmlChar *lit2 = NULL;
 
@@ -1458,6 +1372,12 @@ xsltCompileIdKeyPattern(xsltParserContextPtr ctxt, xmlChar *name,
        return;
     }
     if ((aid) && (xmlStrEqual(name, (const xmlChar *)"id"))) {
+       if (axis != 0) {
+           xsltTransformError(NULL, NULL, NULL,
+                   "xsltCompileIdKeyPattern : NodeTest expected\n");
+           ctxt->error = 1;
+           return;
+       }
        NEXT;
        SKIP_BLANKS;
         lit = xsltScanLiteral(ctxt);
@@ -1473,6 +1393,12 @@ xsltCompileIdKeyPattern(xsltParserContextPtr ctxt, xmlChar *name,
        NEXT;
        PUSH(XSLT_OP_ID, lit, NULL, novar);
     } else if ((aid) && (xmlStrEqual(name, (const xmlChar *)"key"))) {
+       if (axis != 0) {
+           xsltTransformError(NULL, NULL, NULL,
+                   "xsltCompileIdKeyPattern : NodeTest expected\n");
+           ctxt->error = 1;
+           return;
+       }
        NEXT;
        SKIP_BLANKS;
         lit = xsltScanLiteral(ctxt);
@@ -1549,7 +1475,12 @@ xsltCompileIdKeyPattern(xsltParserContextPtr ctxt, xmlChar *name,
            return;
        }
        NEXT;
-       PUSH(XSLT_OP_NODE, NULL, NULL, novar);
+       if (axis == AXIS_ATTRIBUTE) {
+           PUSH(XSLT_OP_ATTR, NULL, NULL, novar);
+       }
+       else {
+           PUSH(XSLT_OP_NODE, NULL, NULL, novar);
+       }
     } else if (aid) {
        xsltTransformError(NULL, NULL, NULL,
            "xsltCompileIdKeyPattern : expecting 'key' or 'id' or node type\n");
@@ -1594,51 +1525,25 @@ xsltCompileStepPattern(xsltParserContextPtr ctxt, xmlChar *token, int novar) {
     const xmlChar *URI = NULL;
     xmlChar *URL = NULL;
     int level;
+    xsltAxis axis = 0;
 
     SKIP_BLANKS;
     if ((token == NULL) && (CUR == '@')) {
-       xmlChar *prefix = NULL;
-
        NEXT;
-       if (CUR == '*') {
-           NEXT;
-           PUSH(XSLT_OP_ATTR, NULL, NULL, novar);
-           goto parse_predicate;
-       }
-       token = xsltScanQName(ctxt, &prefix);
-       if (prefix != NULL) {
-           xmlNsPtr ns;
-
-           ns = xmlSearchNs(ctxt->doc, ctxt->elem, prefix);
-           if (ns == NULL) {
-               xsltTransformError(NULL, NULL, NULL,
-               "xsltCompileStepPattern : no namespace bound to prefix %s\n",
-                                prefix);
-           } else {
-               URL = xmlStrdup(ns->href);
-           }
-           xmlFree(prefix);
-       }
-       if (token == NULL) {
-           if (CUR == '*') {
-               NEXT;
-               PUSH(XSLT_OP_ATTR, NULL, URL, novar);
-               return;
-           }
-           xsltTransformError(NULL, NULL, NULL,
-                   "xsltCompileStepPattern : Name expected\n");
-           ctxt->error = 1;
-           goto error;
-       }
-       PUSH(XSLT_OP_ATTR, token, URL, novar);
-       goto parse_predicate;
+        axis = AXIS_ATTRIBUTE;
     }
+parse_node_test:
     if (token == NULL)
-       token = xsltScanName(ctxt);
+       token = xsltScanNCName(ctxt);
     if (token == NULL) {
        if (CUR == '*') {
            NEXT;
-           PUSH(XSLT_OP_ALL, token, NULL, novar);
+           if (axis == AXIS_ATTRIBUTE) {
+                PUSH(XSLT_OP_ATTR, NULL, NULL, novar);
+            }
+            else {
+                PUSH(XSLT_OP_ALL, NULL, NULL, novar);
+            }
            goto parse_predicate;
        } else {
            xsltTransformError(NULL, NULL, NULL,
@@ -1651,7 +1556,7 @@ xsltCompileStepPattern(xsltParserContextPtr ctxt, xmlChar *token, int novar) {
 
     SKIP_BLANKS;
     if (CUR == '(') {
-       xsltCompileIdKeyPattern(ctxt, token, 0, novar);
+       xsltCompileIdKeyPattern(ctxt, token, 0, novar, axis);
        if (ctxt->error)
            goto error;
     } else if (CUR == ':') {
@@ -1663,7 +1568,7 @@ xsltCompileStepPattern(xsltParserContextPtr ctxt, xmlChar *token, int novar) {
            /*
             * This is a namespace match
             */
-           token = xsltScanName(ctxt);
+           token = xsltScanNCName(ctxt);
            ns = xmlSearchNs(ctxt->doc, ctxt->elem, prefix);
            if (ns == NULL) {
                xsltTransformError(NULL, NULL, NULL,
@@ -1679,7 +1584,12 @@ xsltCompileStepPattern(xsltParserContextPtr ctxt, xmlChar *token, int novar) {
            if (token == NULL) {
                if (CUR == '*') {
                    NEXT;
-                   PUSH(XSLT_OP_NS, URL, NULL, novar);
+                    if (axis == AXIS_ATTRIBUTE) {
+                        PUSH(XSLT_OP_ATTR, NULL, URL, novar);
+                    }
+                    else {
+                        PUSH(XSLT_OP_NS, URL, NULL, novar);
+                    }
                } else {
                    xsltTransformError(NULL, NULL, NULL,
                            "xsltCompileStepPattern : Name expected\n");
@@ -1687,54 +1597,25 @@ xsltCompileStepPattern(xsltParserContextPtr ctxt, xmlChar *token, int novar) {
                    goto error;
                }
            } else {
-               PUSH(XSLT_OP_ELEM, token, URL, novar);
+                if (axis == AXIS_ATTRIBUTE) {
+                    PUSH(XSLT_OP_ATTR, token, URL, novar);
+                }
+                else {
+                    PUSH(XSLT_OP_ELEM, token, URL, novar);
+                }
            }
        } else {
+           if (axis != 0) {
+               xsltTransformError(NULL, NULL, NULL,
+                   "xsltCompileStepPattern : NodeTest expected\n");
+               ctxt->error = 1;
+               goto error;
+           }
            NEXT;
            if (xmlStrEqual(token, (const xmlChar *) "child")) {
-               xmlFree(token);
-               token = xsltScanName(ctxt);
-               if (token == NULL) {
-                   if (CUR == '*') {
-                       NEXT;
-                       PUSH(XSLT_OP_ALL, token, NULL, novar);
-                       goto parse_predicate;
-                   } else {
-                       xsltTransformError(NULL, NULL, NULL,
-                           "xsltCompileStepPattern : QName expected\n");
-                       ctxt->error = 1;
-                       goto error;
-                   }
-               }
-               URI = xsltGetQNameURI(ctxt->elem, &token);
-               if (token == NULL) {
-                   ctxt->error = 1;
-                   goto error;
-               } else {
-                   name = xmlStrdup(token);
-                   if (URI != NULL)
-                       URL = xmlStrdup(URI);
-               }
-               PUSH(XSLT_OP_CHILD, name, URL, novar);
+               axis = AXIS_CHILD;
            } else if (xmlStrEqual(token, (const xmlChar *) "attribute")) {
-               xmlFree(token);
-               token = xsltScanName(ctxt);
-               if (token == NULL) {
-                   xsltTransformError(NULL, NULL, NULL,
-                           "xsltCompileStepPattern : QName expected\n");
-                   ctxt->error = 1;
-                   goto error;
-               }
-               URI = xsltGetQNameURI(ctxt->elem, &token);
-               if (token == NULL) {
-                   ctxt->error = 1;
-                   goto error;
-               } else {
-                   name = xmlStrdup(token);
-                   if (URI != NULL)
-                       URL = xmlStrdup(URI);
-               }
-               PUSH(XSLT_OP_ATTR, name, URL, novar);
+               axis = AXIS_ATTRIBUTE;
            } else {
                xsltTransformError(NULL, NULL, NULL,
                    "xsltCompileStepPattern : 'child' or 'attribute' expected\n");
@@ -1742,10 +1623,10 @@ xsltCompileStepPattern(xsltParserContextPtr ctxt, xmlChar *token, int novar) {
                goto error;
            }
            xmlFree(token);
+            SKIP_BLANKS;
+            token = xsltScanNCName(ctxt);
+           goto parse_node_test;
        }
-    } else if (CUR == '*') {
-       NEXT;
-       PUSH(XSLT_OP_ALL, token, NULL, novar);
     } else {
        URI = xsltGetQNameURI(ctxt->elem, &token);
        if (token == NULL) {
@@ -1754,7 +1635,12 @@ xsltCompileStepPattern(xsltParserContextPtr ctxt, xmlChar *token, int novar) {
        }
        if (URI != NULL)
            URL = xmlStrdup(URI);
-       PUSH(XSLT_OP_ELEM, token, URL, novar);
+        if (axis == AXIS_ATTRIBUTE) {
+            PUSH(XSLT_OP_ATTR, token, URL, novar);
+        }
+        else {
+            PUSH(XSLT_OP_ELEM, token, URL, novar);
+        }
     }
 parse_predicate:
     SKIP_BLANKS;
@@ -1891,7 +1777,7 @@ xsltCompileLocationPathPattern(xsltParserContextPtr ctxt, int novar) {
        xsltCompileRelativePathPattern(ctxt, NULL, novar);
     } else {
        xmlChar *name;
-       name = xsltScanName(ctxt);
+       name = xsltScanNCName(ctxt);
        if (name == NULL) {
            xsltTransformError(NULL, NULL, NULL,
                    "xsltCompileLocationPathPattern : Name expected\n");
@@ -1900,7 +1786,7 @@ xsltCompileLocationPathPattern(xsltParserContextPtr ctxt, int novar) {
        }
        SKIP_BLANKS;
        if ((CUR == '(') && !xmlXPathIsNodeType(name)) {
-           xsltCompileIdKeyPattern(ctxt, name, 1, novar);
+           xsltCompileIdKeyPattern(ctxt, name, 1, novar, 0);
            if ((CUR == '/') && (NXT(1) == '/')) {
                PUSH(XSLT_OP_ANCESTOR, NULL, NULL, novar);
                NEXT;
@@ -2178,7 +2064,6 @@ xsltAddTemplate(xsltStylesheetPtr style, xsltTemplatePtr cur,
            else
                top = &(style->attrMatch);
            break;
-        case XSLT_OP_CHILD:
         case XSLT_OP_PARENT:
         case XSLT_OP_ANCESTOR:
            top = &(style->elemMatch);
index cae04f4..9e228b5 100644 (file)
@@ -42,6 +42,9 @@ EXTRA_DIST =                                          \
     test-5.2-16.out test-5.2-16.xml test-5.2-16.xsl    \
     test-5.2-17.out test-5.2-17.xml test-5.2-17.xsl    \
     test-5.2-18.out test-5.2-18.xml test-5.2-18.xsl    \
+    test-5.2-19.out test-5.2-19.xml test-5.2-19.xsl    \
+    test-5.2-20.out test-5.2-20.xml test-5.2-20.xsl    \
+    test-5.2-21.out test-5.2-21.xml test-5.2-21.xsl    \
     test-5.3.out test-5.3.xml test-5.3.xsl             \
     test-5.4-1.out test-5.4-1.xml test-5.4-1.xsl       \
     test-5.4-2.out test-5.4-2.xml test-5.4-2.xsl       \
diff --git a/tests/REC/test-5.2-19.out b/tests/REC/test-5.2-19.out
new file mode 100644 (file)
index 0000000..eab8e77
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<result>element</result>
diff --git a/tests/REC/test-5.2-19.xml b/tests/REC/test-5.2-19.xml
new file mode 100644 (file)
index 0000000..5694448
--- /dev/null
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<root>
+    <element/>
+</root>
diff --git a/tests/REC/test-5.2-19.xsl b/tests/REC/test-5.2-19.xsl
new file mode 100644 (file)
index 0000000..d3998e6
--- /dev/null
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+<xsl:template match="text()"/>
+
+<xsl:template match="child::element">
+    <result>
+        <xsl:value-of select="name()"/>
+    </result>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tests/REC/test-5.2-20.out b/tests/REC/test-5.2-20.out
new file mode 100644 (file)
index 0000000..67a2a01
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<result>value</result>
diff --git a/tests/REC/test-5.2-20.xml b/tests/REC/test-5.2-20.xml
new file mode 100644 (file)
index 0000000..ca946bc
--- /dev/null
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<root>
+    <element attribute="value"/>
+</root>
diff --git a/tests/REC/test-5.2-20.xsl b/tests/REC/test-5.2-20.xsl
new file mode 100644 (file)
index 0000000..6590f66
--- /dev/null
@@ -0,0 +1,14 @@
+<?xml version="1.0"?>
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+<xsl:template match="*">
+    <xsl:apply-templates select="*|@*"/>
+</xsl:template>
+
+<xsl:template match="@node()">
+    <result>
+        <xsl:value-of select="."/>
+    </result>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tests/REC/test-5.2-21.out b/tests/REC/test-5.2-21.out
new file mode 100644 (file)
index 0000000..67a2a01
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<result>value</result>
diff --git a/tests/REC/test-5.2-21.xml b/tests/REC/test-5.2-21.xml
new file mode 100644 (file)
index 0000000..ca946bc
--- /dev/null
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<root>
+    <element attribute="value"/>
+</root>
diff --git a/tests/REC/test-5.2-21.xsl b/tests/REC/test-5.2-21.xsl
new file mode 100644 (file)
index 0000000..15a80fb
--- /dev/null
@@ -0,0 +1,14 @@
+<?xml version="1.0"?>
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+<xsl:template match="*">
+    <xsl:apply-templates select="*|@*"/>
+</xsl:template>
+
+<xsl:template match="attribute::node()">
+    <result>
+        <xsl:value-of select="."/>
+    </result>
+</xsl:template>
+
+</xsl:stylesheet>