From: Daniel Veillard Date: Mon, 5 Feb 2001 18:29:06 +0000 (+0000) Subject: Starting doing some bulk testing and transformations: X-Git-Tag: v1.1.28~1483 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d3faa0e17acd252ffaf56e94b849536e65a6e354;p=platform%2Fupstream%2Flibxslt.git Starting doing some bulk testing and transformations: - FEATURES: updated, added mode support for templates - pattern.[ch] transform.c xslt.c xsltInternals.h: added mode support for templates - templates.c variables.c: simple fixes - xslt.c: added a separate DEBUG_BLANKS debug class disabled - xsltproc.c: added option -timing - xsltutils.c: seem I forgot to add encoding support in output... - configure.in tests/Makefile.am tests/xmlspec/*: added a test consisting of reformatting the XML REC with the xmlspec XSLT, heavy ! Daniel --- diff --git a/ChangeLog b/ChangeLog index adbec63..82be479 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +Mon Feb 5 18:58:17 CET 2001 Daniel Veillard + + * FEATURES: updated, added mode support for templates + * pattern.[ch] transform.c xslt.c xsltInternals.h: added mode + support for templates + * templates.c variables.c: simple fixes + * xslt.c: added a separate DEBUG_BLANKS debug class disabled + * xsltproc.c: added option -timing + * xsltutils.c: seem I forgot to add encoding support in output... + * configure.in tests/Makefile.am tests/xmlspec/*: added a test + consisting of reformatting the XML REC with the xmlspec XSLT, + heavy ! + Mon Feb 5 18:43:37 CET 2001 Bjorn Reese * FEATURES: updated diff --git a/FEATURES b/FEATURES index d14ca3e..35551ad 100644 --- a/FEATURES +++ b/FEATURES @@ -43,7 +43,7 @@ YES xsl:template YES match = pattern YES name = qname YES priority = number -NO mode = qname +YES mode = qname YES xsl:namespace-alias YES stylesheet-prefix = prefix | "#default" @@ -85,7 +85,7 @@ Instructions: YES xsl:apply-templates YES select = node-set-expression -NO mode = qname +YES mode = qname NO xsl:apply-imports diff --git a/configure.in b/configure.in index 6ad93ac..3e5122f 100644 --- a/configure.in +++ b/configure.in @@ -150,6 +150,7 @@ tests/REC2/Makefile tests/REC/Makefile tests/general/Makefile tests/numbers/Makefile +tests/xmlspec/Makefile doc/Makefile xslt-config libxslt.spec diff --git a/libxslt/pattern.c b/libxslt/pattern.c index f4203f7..0bf79be 100644 --- a/libxslt/pattern.c +++ b/libxslt/pattern.c @@ -24,6 +24,7 @@ #include "xsltutils.h" #include "imports.h" #include "templates.h" +#include "pattern.h" /* #define DEBUG_PARSING */ @@ -63,7 +64,9 @@ typedef struct _xsltCompMatch xsltCompMatch; typedef xsltCompMatch *xsltCompMatchPtr; struct _xsltCompMatch { struct _xsltCompMatch *next; /* siblings in the name hash */ - float priority; /* the priority */ + float priority; /* the priority */ + const xmlChar *mode; /* the mode */ + const xmlChar *modeURI; /* the mode URI */ xsltTemplatePtr template; /* the associated template */ /* TODO fix the statically allocated size steps[] */ @@ -89,13 +92,15 @@ struct _xsltParserContext { /** * xsltNewCompMatch: + * @mode: the mode name or NULL + * @modeURI: the mode URI or NULL * * Create a new XSLT CompMatch * * Returns the newly allocated xsltCompMatchPtr or NULL in case of error */ xsltCompMatchPtr -xsltNewCompMatch(void) { +xsltNewCompMatch(const xmlChar *mode, const xmlChar *modeURI) { xsltCompMatchPtr cur; cur = (xsltCompMatchPtr) xmlMalloc(sizeof(xsltCompMatch)); @@ -106,6 +111,8 @@ xsltNewCompMatch(void) { } memset(cur, 0, sizeof(xsltCompMatch)); cur->maxStep = 20; + cur->mode = xmlStrdup(mode); + cur->modeURI = xmlStrdup(modeURI); return(cur); } @@ -122,6 +129,10 @@ xsltFreeCompMatch(xsltCompMatchPtr comp) { if (comp == NULL) return; + if (comp->mode != NULL) + xmlFree((xmlChar *)comp->mode); + if (comp->modeURI != NULL) + xmlFree((xmlChar *)comp->modeURI); for (i = 0;i < comp->nbStep;i++) { op = &comp->steps[i]; if (op->value != NULL) @@ -278,6 +289,8 @@ xsltReverseCompMatch(xsltCompMatchPtr comp) { * @ctxt: a XSLT process context * @comp: the precompiled pattern * @node: a node + * @mode: the mode name or NULL + * @modeURI: the mode URI or NULL * * Test wether the node matches the pattern * @@ -285,7 +298,8 @@ xsltReverseCompMatch(xsltCompMatchPtr comp) { */ int xsltTestCompMatch(xsltTransformContextPtr ctxt, xsltCompMatchPtr comp, - xmlNodePtr node) { + xmlNodePtr node, const xmlChar *mode, + const xmlChar *modeURI) { int i; xsltStepOpPtr step, select = NULL; @@ -294,6 +308,25 @@ xsltTestCompMatch(xsltTransformContextPtr ctxt, xsltCompMatchPtr comp, "xsltTestCompMatch: null arg\n"); return(-1); } + if (mode != NULL) { + if (comp->mode == NULL) + return(0); + if ((comp->mode != mode) && (!xmlStrEqual(comp->mode, mode))) + return(0); + } else { + if (comp->mode != NULL) + return(0); + } + if (modeURI != NULL) { + if (comp->modeURI == NULL) + return(0); + if ((comp->modeURI != modeURI) && + (!xmlStrEqual(comp->modeURI, modeURI))) + return(0); + } else { + if (comp->modeURI != NULL) + return(0); + } for (i = 0;i < comp->nbStep;i++) { step = &comp->steps[i]; if (step->op != XSLT_OP_PREDICATE) @@ -442,6 +475,8 @@ xsltTestCompMatch(xsltTransformContextPtr ctxt, xsltCompMatchPtr comp, * Depending on the last selection, one may need to * recompute contextSize and proximityPosition. */ + oldCS = ctxt->xpathCtxt->contextSize; + oldCP = ctxt->xpathCtxt->proximityPosition; if ((select != NULL) && (select->op == XSLT_OP_ELEM) && (select->value != NULL) && @@ -451,8 +486,6 @@ xsltTestCompMatch(xsltTransformContextPtr ctxt, xsltCompMatchPtr comp, /* TODO: cache those informations ?!? */ xmlNodePtr siblings = node->parent->children; - oldCS = ctxt->xpathCtxt->contextSize; - oldCP = ctxt->xpathCtxt->proximityPosition; while (siblings != NULL) { if (siblings->type == XML_ELEMENT_NODE) { if (siblings == node) { @@ -1046,6 +1079,8 @@ error: /** * xsltCompilePattern: * @pattern an XSLT pattern + * @mode: the mode name or NULL + * @modeURI: the mode URI or NULL * * Compile the XSLT pattern and generates a precompiled form suitable * for fast matching. @@ -1058,7 +1093,8 @@ error: */ xsltCompMatchPtr -xsltCompilePattern(const xmlChar *pattern) { +xsltCompilePattern(const xmlChar *pattern, const xmlChar *mode, + const xmlChar *modeURI) { xsltParserContextPtr ctxt; xsltCompMatchPtr ret; const xmlChar *cur; @@ -1084,7 +1120,7 @@ xsltCompilePattern(const xmlChar *pattern) { ctxt = xsltNewParserContext(); if (ctxt == NULL) return(NULL); - ret = xsltNewCompMatch(); + ret = xsltNewCompMatch(mode, modeURI); if (ret == NULL) { xsltFreeParserContext(ctxt); return(NULL); @@ -1149,14 +1185,17 @@ error: * xsltAddTemplate: * @style: an XSLT stylesheet * @cur: an XSLT template + * @mode: the mode name or NULL + * @modeURI: the mode URI or NULL * * Register the XSLT pattern associated to @cur * * Returns -1 in case of error, 0 otherwise */ int -xsltAddTemplate(xsltStylesheetPtr style, xsltTemplatePtr cur) { - xsltCompMatchPtr pat, list, *top; +xsltAddTemplate(xsltStylesheetPtr style, xsltTemplatePtr cur, + const xmlChar *mode, const xmlChar *modeURI) { + xsltCompMatchPtr pat, list, *top = NULL; const xmlChar *name = NULL; xmlChar *p, *pattern, tmp; @@ -1181,7 +1220,7 @@ next_pattern: tmp = *p; *p = 0; - pat = xsltCompilePattern(pattern); + pat = xsltCompilePattern(pattern, mode, modeURI); *p = tmp; if (tmp != 0) p++; @@ -1249,7 +1288,7 @@ next_pattern: } if (name != NULL) { if (style->templatesHash == NULL) { - style->templatesHash = xmlHashCreate(0); + style->templatesHash = xmlHashCreate(1024); if (style->templatesHash == NULL) { xsltFreeCompMatch(pat); return(-1); @@ -1258,15 +1297,17 @@ next_pattern: xsltGenericDebug(xsltGenericDebugContext, "xsltAddTemplate: created template hash\n"); #endif - xmlHashAddEntry(style->templatesHash, name, pat); + xmlHashAddEntry3(style->templatesHash, name, mode, modeURI, pat); #ifdef DEBUG_PARSING xsltGenericDebug(xsltGenericDebugContext, "xsltAddTemplate: added new hash %s\n", name); #endif } else { - list = (xsltCompMatchPtr) xmlHashLookup(style->templatesHash, name); + list = (xsltCompMatchPtr) xmlHashLookup3(style->templatesHash, + name, mode, modeURI); if (list == NULL) { - xmlHashAddEntry(style->templatesHash, name, pat); + xmlHashAddEntry3(style->templatesHash, name, + mode, modeURI, pat); #ifdef DEBUG_PARSING xsltGenericDebug(xsltGenericDebugContext, "xsltAddTemplate: added new hash %s\n", name); @@ -1279,7 +1320,8 @@ next_pattern: */ if (list->priority <= pat->priority) { pat->next = list; - xmlHashUpdateEntry(style->templatesHash, name, pat, NULL); + xmlHashUpdateEntry3(style->templatesHash, name, + mode, modeURI, pat, NULL); #ifdef DEBUG_PARSING xsltGenericDebug(xsltGenericDebugContext, "xsltAddTemplate: added head hash for %s\n", name); @@ -1326,7 +1368,7 @@ next_pattern: /** * xsltGetTemplate: * @ctxt: a XSLT process context - * @node: an XML Node + * @mode: the mode name or NULL * * Finds the template applying to this node * @@ -1382,10 +1424,12 @@ xsltGetTemplate(xsltTransformContextPtr ctxt, xmlNodePtr node) { /* * find the list of appliable expressions based on the name */ - list = (xsltCompMatchPtr) xmlHashLookup(style->templatesHash, name); + list = (xsltCompMatchPtr) xmlHashLookup3(style->templatesHash, + name, ctxt->mode, ctxt->modeURI); } while (list != NULL) { - if (xsltTestCompMatch(ctxt, list, node)) { + if (xsltTestCompMatch(ctxt, list, node, + ctxt->mode, ctxt->modeURI)) { ret = list->template; break; } @@ -1436,7 +1480,8 @@ xsltGetTemplate(xsltTransformContextPtr ctxt, xmlNodePtr node) { } while ((list != NULL) && ((ret == NULL) || (list->priority > ret->priority))) { - if (xsltTestCompMatch(ctxt, list, node)) { + if (xsltTestCompMatch(ctxt, list, node, + ctxt->mode, ctxt->modeURI)) { ret = list->template; break; } diff --git a/libxslt/pattern.h b/libxslt/pattern.h index 30f9daf..946ff30 100644 --- a/libxslt/pattern.h +++ b/libxslt/pattern.h @@ -16,7 +16,9 @@ extern "C" { #endif int xsltAddTemplate (xsltStylesheetPtr style, - xsltTemplatePtr cur); + xsltTemplatePtr cur, + const xmlChar *mode, + const xmlChar *modeURI); xsltTemplatePtr xsltGetTemplate (xsltTransformContextPtr ctxt, xmlNodePtr node); void xsltFreeTemplateHashes (xsltStylesheetPtr style); diff --git a/libxslt/templates.c b/libxslt/templates.c index e91bb10..9bc2095 100644 --- a/libxslt/templates.c +++ b/libxslt/templates.c @@ -55,7 +55,7 @@ xsltEvalXPathPredicate(xsltTransformContextPtr ctxt, const xmlChar *expr) { xpathParserCtxt = xmlXPathNewParserContext(expr, ctxt->xpathCtxt); if (xpathParserCtxt == NULL) - return(NULL); + return(0); ctxt->xpathCtxt->node = ctxt->node; xmlXPathEvalExpr(xpathParserCtxt); res = valuePop(xpathParserCtxt); diff --git a/libxslt/transform.c b/libxslt/transform.c index 73374d9..35ac406 100644 --- a/libxslt/transform.c +++ b/libxslt/transform.c @@ -646,7 +646,6 @@ xsltCopyOf(xsltTransformContextPtr ctxt, xmlNodePtr node, if (xpathParserCtxt == NULL) goto error; ctxt->xpathCtxt->node = node; - valuePush(xpathParserCtxt, xmlXPathNewNodeSet(node)); xmlXPathEvalExpr(xpathParserCtxt); res = valuePop(xpathParserCtxt); do { @@ -767,7 +766,6 @@ xsltValueOf(xsltTransformContextPtr ctxt, xmlNodePtr node, if (xpathParserCtxt == NULL) goto error; ctxt->xpathCtxt->node = node; - valuePush(xpathParserCtxt, xmlXPathNewNodeSet(node)); xmlXPathEvalExpr(xpathParserCtxt); xmlXPathStringFunction(xpathParserCtxt, 1); res = valuePop(xpathParserCtxt); @@ -906,7 +904,7 @@ xsltNumber(xsltTransformContextPtr ctxt, prop = xmlGetNsProp(cur, (const xmlChar *)"grouping-size", XSLT_NAMESPACE); if (prop != NULL) { - sscanf(prop, "%d", &numdata.digitsPerGroup); + sscanf((char *)prop, "%d", &numdata.digitsPerGroup); xmlFree(prop); } else { numdata.groupingCharacter = 0; @@ -969,7 +967,7 @@ xsltDefaultProcessOneNode(xsltTransformContextPtr ctxt, xmlNodePtr node) { ctxt->node = node; xsltApplyOneTemplate(ctxt, node, template->content); ctxt->node = oldNode; - } else { + } else if (ctxt->mode == NULL) { copy = xmlCopyNode(node, 0); if (copy != NULL) { xmlAddChild(ctxt->insert, copy); @@ -990,7 +988,7 @@ xsltDefaultProcessOneNode(xsltTransformContextPtr ctxt, xmlNodePtr node) { ctxt->node = node; xsltApplyOneTemplate(ctxt, node, template->content); ctxt->node = oldNode; - } else { + } else if (ctxt->mode == NULL) { if (attr->ns != NULL) { if ((!xmlStrEqual(attr->ns->href, XSLT_NAMESPACE)) && (xmlStrncasecmp(attr->ns->prefix, @@ -1105,7 +1103,7 @@ xsltDefaultProcessOneNode(xsltTransformContextPtr ctxt, xmlNodePtr node) { ctxt->xpathCtxt->proximityPosition = childno; xsltApplyOneTemplate(ctxt, cur, template->content); ctxt->node = oldNode; - } else { + } else if (ctxt->mode == NULL) { copy = xmlCopyNode(cur, 0); if (copy != NULL) { xmlAddChild(ctxt->insert, copy); @@ -1240,6 +1238,8 @@ xsltApplyTemplates(xsltTransformContextPtr ctxt, xmlNodePtr node, xmlNodeSetPtr list = NULL, oldlist; xmlXPathParserContextPtr xpathParserCtxt = NULL; int i, oldProximityPosition, oldContextSize; + xmlChar *mode, *modeURI; + const xmlChar *oldmode, *oldmodeURI; if ((ctxt == NULL) || (node == NULL) || (inst == NULL)) return; @@ -1248,6 +1248,52 @@ xsltApplyTemplates(xsltTransformContextPtr ctxt, xmlNodePtr node, xsltGenericDebug(xsltGenericDebugContext, "xsltApplyTemplates: node: %s\n", node->name); #endif + + /* + * Get mode if any + */ + oldmode = ctxt->mode; + oldmodeURI = ctxt->modeURI; + prop = xmlGetNsProp(inst, (const xmlChar *)"mode", XSLT_NAMESPACE); + if (prop != NULL) { + xmlChar *prefix = NULL; + + mode = xmlSplitQName2(prop, &prefix); + if (mode != NULL) { + if (prefix != NULL) { + xmlNsPtr ns; + + ns = xmlSearchNs(inst->doc, inst, prefix); + if (ns == NULL) { + xsltGenericError(xsltGenericErrorContext, + "no namespace bound to prefix %s\n", prefix); + xmlFree(prefix); + xmlFree(mode); + mode = prop; + } else { + modeURI = xmlStrdup(ns->href); + xmlFree(prefix); + xmlFree(prop); + } + } else { + xmlFree(prop); + modeURI = NULL; + } + } else { + mode = prop; + modeURI = NULL; + } +#ifdef DEBUG_PROCESS + xsltGenericDebug(xsltGenericDebugContext, + "xsltApplyTemplates: mode %s\n", mode); +#endif + } else { + mode = NULL; + modeURI = NULL; + } + ctxt->mode = mode; + ctxt->modeURI = modeURI; + prop = xmlGetNsProp(inst, (const xmlChar *)"select", XSLT_NAMESPACE); if (prop != NULL) { #ifdef DEBUG_PROCESS @@ -1262,7 +1308,6 @@ xsltApplyTemplates(xsltTransformContextPtr ctxt, xmlNodePtr node, if (xpathParserCtxt == NULL) goto error; ctxt->xpathCtxt->node = node; - valuePush(xpathParserCtxt, xmlXPathNewNodeSet(node)); xmlXPathEvalExpr(xpathParserCtxt); res = valuePop(xpathParserCtxt); do { @@ -1369,6 +1414,8 @@ xsltApplyTemplates(xsltTransformContextPtr ctxt, xmlNodePtr node, ctxt->xpathCtxt->proximityPosition = oldProximityPosition; error: + ctxt->mode = oldmode; + ctxt->modeURI = oldmodeURI; if (xpathParserCtxt != NULL) xmlXPathFreeParserContext(xpathParserCtxt); if (prop != NULL) @@ -1377,6 +1424,10 @@ error: xmlXPathFreeObject(res); if (list != NULL) xmlXPathFreeNodeSet(list); + if (mode != NULL) + xmlFree(mode); + if (modeURI != NULL) + xmlFree(modeURI); } @@ -1476,11 +1527,13 @@ xsltApplyOneTemplate(xsltTransformContextPtr ctxt, xmlNodePtr node, } xsltParseStylesheetParam(ctxt, cur); } else if (IS_XSLT_NAME(cur, "call-template")) { + ctxt->insert = insert; if (has_variables == 0) { xsltPushStack(ctxt); has_variables = 1; } xsltCallTemplate(ctxt, node, cur); + ctxt->insert = oldInsert; } else if (IS_XSLT_NAME(cur, "message")) { xsltMessage(ctxt, node, cur); } else { @@ -1621,7 +1674,6 @@ xsltChoose(xsltTransformContextPtr ctxt, xmlNodePtr node, if (xpathParserCtxt == NULL) goto error; ctxt->xpathCtxt->node = node; - valuePush(xpathParserCtxt, xmlXPathNewNodeSet(node)); xmlXPathEvalExpr(xpathParserCtxt); xmlXPathBooleanFunction(xpathParserCtxt, 1); res = valuePop(xpathParserCtxt); @@ -1721,7 +1773,6 @@ xsltIf(xsltTransformContextPtr ctxt, xmlNodePtr node, if (xpathParserCtxt == NULL) goto error; ctxt->xpathCtxt->node = node; - valuePush(xpathParserCtxt, xmlXPathNewNodeSet(node)); xmlXPathEvalExpr(xpathParserCtxt); xmlXPathBooleanFunction(xpathParserCtxt, 1); res = valuePop(xpathParserCtxt); @@ -1749,7 +1800,7 @@ xsltIf(xsltTransformContextPtr ctxt, xmlNodePtr node, "xsltIf: test evaluate to %d\n", doit); #endif if (doit) { - xsltApplyOneTemplate(ctxt, ctxt->node, inst->children); + xsltApplyOneTemplate(ctxt, node, inst->children); } error: @@ -1797,7 +1848,6 @@ xsltForEach(xsltTransformContextPtr ctxt, xmlNodePtr node, if (xpathParserCtxt == NULL) goto error; ctxt->xpathCtxt->node = node; - valuePush(xpathParserCtxt, xmlXPathNewNodeSet(node)); xmlXPathEvalExpr(xpathParserCtxt); res = valuePop(xpathParserCtxt); do { diff --git a/libxslt/variables.c b/libxslt/variables.c index c3aab76..0b0024e 100644 --- a/libxslt/variables.c +++ b/libxslt/variables.c @@ -501,7 +501,7 @@ xmlXPathObjectPtr xsltGlobalVariableLookup(xsltTransformContextPtr ctxt, const xmlChar *name, const xmlChar *ns_uri) { xsltStylesheetPtr style; - xsltStackElemPtr elem; + xsltStackElemPtr elem = NULL; style = ctxt->style; /* TODO: handle the stylesheet cascade */ diff --git a/libxslt/xslt.c b/libxslt/xslt.c index a473e03..363a175 100644 --- a/libxslt/xslt.c +++ b/libxslt/xslt.c @@ -31,6 +31,7 @@ #include "imports.h" #define DEBUG_PARSING +/* #define DEBUG_BLANKS */ /* * Useful macros @@ -694,7 +695,7 @@ xsltParseRemoveBlanks(xsltStylesheetPtr style) { delete = NULL; while (cur != NULL) { if (delete != NULL) { -#ifdef DEBUG_PARSING +#ifdef DEBUG_BLANKS xsltGenericDebug(xsltGenericDebugContext, "xsltParseRemoveBlanks: removing ignorable blank node\n"); #endif @@ -780,9 +781,9 @@ xsltParseTemplateContent(xsltStylesheetPtr style, xsltTemplatePtr ret, delete = NULL; while (cur != NULL) { if (delete != NULL) { -#ifdef DEBUG_PARSING +#ifdef DEBUG_BLANKS xsltGenericDebug(xsltGenericDebugContext, - "xsltParseStylesheetTemplate: removing text\n"); + "xsltParseTemplateContent: removing text\n"); #endif xmlUnlinkNode(delete); xmlFreeNode(delete); @@ -803,6 +804,10 @@ xsltParseTemplateContent(xsltStylesheetPtr style, xsltTemplatePtr ret, (const xmlChar *)"disable-output-escaping", XSLT_NAMESPACE); if (prop != NULL) { +#ifdef DEBUG_PARSING + xsltGenericDebug(xsltGenericDebugContext, + "Disable escaping: %s\n", text->content); +#endif if (xmlStrEqual(prop, (const xmlChar *)"yes")) { text->name = xmlStringTextNoenc; } else if (!xmlStrEqual(prop, @@ -904,6 +909,8 @@ void xsltParseStylesheetTemplate(xsltStylesheetPtr style, xmlNodePtr template) { xsltTemplatePtr ret; xmlChar *prop; + xmlChar *mode; + xmlChar *modeURI; if (template == NULL) return; @@ -920,6 +927,43 @@ xsltParseStylesheetTemplate(xsltStylesheetPtr style, xmlNodePtr template) { /* * Get arguments */ + prop = xmlGetNsProp(template, (const xmlChar *)"mode", XSLT_NAMESPACE); + if (prop != NULL) { + xmlChar *prefix = NULL; + + mode = xmlSplitQName2(prop, &prefix); + if (mode != NULL) { + if (prefix != NULL) { + xmlNsPtr ns; + + ns = xmlSearchNs(template->doc, template, prefix); + if (ns == NULL) { + xsltGenericError(xsltGenericErrorContext, + "no namespace bound to prefix %s\n", prefix); + xmlFree(prefix); + xmlFree(mode); + mode = prop; + } else { + modeURI = xmlStrdup(ns->href); + xmlFree(prefix); + xmlFree(prop); + } + } else { + xmlFree(prop); + modeURI = NULL; + } + } else { + mode = prop; + modeURI = NULL; + } +#ifdef DEBUG_PARSING + xsltGenericDebug(xsltGenericDebugContext, + "xslt:template: mode %s\n", mode); +#endif + } else { + mode = NULL; + modeURI = NULL; + } prop = xmlGetNsProp(template, (const xmlChar *)"match", XSLT_NAMESPACE); if (prop != NULL) { if (ret->match != NULL) xmlFree(ret->match); @@ -967,7 +1011,12 @@ xsltParseStylesheetTemplate(xsltStylesheetPtr style, xmlNodePtr template) { * parse the content and register the pattern */ xsltParseTemplateContent(style, ret, template); - xsltAddTemplate(style, ret); + xsltAddTemplate(style, ret, mode, modeURI); + + if (mode != NULL) + xmlFree(mode); + if (modeURI != NULL) + xmlFree(modeURI); } /** @@ -1160,7 +1209,7 @@ xsltParseStylesheetProcess(xsltStylesheetPtr ret, xmlDocPtr doc) { * parse the content and register the pattern */ xsltParseTemplateContent(ret, template, (xmlNodePtr) doc); - xsltAddTemplate(ret, template); + xsltAddTemplate(ret, template, NULL, NULL); } return(ret); diff --git a/libxslt/xsltInternals.h b/libxslt/xsltInternals.h index 8228219..1cd6ce5 100644 --- a/libxslt/xsltInternals.h +++ b/libxslt/xsltInternals.h @@ -173,6 +173,9 @@ struct _xsltTransformContext { xsltStylesheetPtr style; /* the stylesheet used */ xsltOutputType type; /* the type of output */ + const xmlChar *mode; /* the current mode */ + const xmlChar *modeURI; /* the current mode URI */ + xmlDocPtr doc; /* the current doc */ xmlNodePtr node; /* the current node */ xmlNodeSetPtr nodeList; /* the current node list */ diff --git a/libxslt/xsltproc.c b/libxslt/xsltproc.c index abd928e..a5b0aec 100644 --- a/libxslt/xsltproc.c +++ b/libxslt/xsltproc.c @@ -7,6 +7,8 @@ */ #include +#include +#include #include #include #include @@ -16,14 +18,18 @@ #include #include +extern int xmlLoadExtDtdDefaultValue; + static int debug = 0; static int repeat = 0; +static int timing = 0; int main(int argc, char **argv) { int i; xsltStylesheetPtr cur = NULL; xmlDocPtr doc, res; + struct timeval begin, end; /* --repeat : repeat 100 times, for timing or profiling */ LIBXML_TEST_VERSION @@ -37,12 +43,27 @@ main(int argc, char **argv) { } else if ((!strcmp(argv[i], "-repeat")) || (!strcmp(argv[i], "--repeat"))) { repeat++; + } else if ((!strcmp(argv[i], "-timing")) || + (!strcmp(argv[i], "--timing"))) { + timing++; } } xmlSubstituteEntitiesDefault(1); + xmlLoadExtDtdDefaultValue = 1; for (i = 1; i < argc ; i++) { if ((argv[i][0] != '-') || (strcmp(argv[i], "-") == 0)) { + if (timing) + gettimeofday(&begin, NULL); cur = xsltParseStylesheetFile((const xmlChar *)argv[i]); + if (timing) { + long msec; + gettimeofday(&end, NULL); + msec = end.tv_sec - begin.tv_sec; + msec *= 1000; + msec += (end.tv_usec - begin.tv_usec) / 1000; + fprintf(stderr, "Parsing stylesheet %s took %ld ms\n", + argv[i], msec); + } if (cur != NULL) { if (cur->indent == 1) xmlIndentTreeOutput = 1; @@ -56,11 +77,22 @@ main(int argc, char **argv) { } if (cur != NULL) { for (;i < argc ; i++) { + if (timing) + gettimeofday(&begin, NULL); doc = xmlParseFile(argv[i]); if (doc == NULL) { fprintf(stderr, "unable to parse %s\n", argv[i]); continue; } + if (timing) { + long msec; + gettimeofday(&end, NULL); + msec = end.tv_sec - begin.tv_sec; + msec *= 1000; + msec += (end.tv_usec - begin.tv_usec) / 1000; + fprintf(stderr, "Parsing document %s took %ld ms\n", + argv[i], msec); + } if (repeat) { int j; for (j = 0;j < 99; j++) { @@ -70,19 +102,41 @@ main(int argc, char **argv) { doc = xmlParseFile(argv[i]); } } + if (timing) + gettimeofday(&begin, NULL); res = xsltApplyStylesheet(cur, doc); + if (timing) { + long msec; + gettimeofday(&end, NULL); + msec = end.tv_sec - begin.tv_sec; + msec *= 1000; + msec += (end.tv_usec - begin.tv_usec) / 1000; + fprintf(stderr, "Applying stylesheet took %ld ms\n", + msec); + } xmlFreeDoc(doc); if (res == NULL) { fprintf(stderr, "no result for %s\n", argv[i]); continue; } if (cur->methodURI == NULL) { + if (timing) + gettimeofday(&begin, NULL); #ifdef LIBXML_DEBUG_ENABLED if (debug) xmlDebugDumpDocument(stdout, res); else #endif xsltSaveResultToFile(stdout, res, cur); + if (timing) { + long msec; + gettimeofday(&end, NULL); + msec = end.tv_sec - begin.tv_sec; + msec *= 1000; + msec += (end.tv_usec - begin.tv_usec) / 1000; + fprintf(stderr, "Saving result took %ld ms\n", + msec); + } } xmlFreeDoc(res); diff --git a/libxslt/xsltutils.c b/libxslt/xsltutils.c index f506071..0ae7caa 100644 --- a/libxslt/xsltutils.c +++ b/libxslt/xsltutils.c @@ -375,7 +375,17 @@ xsltSaveResultToFilename(const char *URL, xmlDocPtr result, if ((URL == NULL) || (result == NULL) || (style == NULL)) return(-1); - buf = xmlOutputBufferCreateFilename(URL, NULL, compression); + if (style->encoding != NULL) { + xmlCharEncodingHandlerPtr encoder; + + encoder = xmlFindCharEncodingHandler((char *)style->encoding); + if ((encoder != NULL) && + (xmlStrEqual(encoder->name, (const xmlChar *) "UTF-8"))) + encoder = NULL; + buf = xmlOutputBufferCreateFilename(URL, encoder, compression); + } else { + buf = xmlOutputBufferCreateFilename(URL, NULL, compression); + } if (buf == NULL) return(-1); xsltSaveResultTo(buf, result, style); @@ -403,7 +413,18 @@ xsltSaveResultToFile(FILE *file, xmlDocPtr result, xsltStylesheetPtr style) { if ((file == NULL) || (result == NULL) || (style == NULL)) return(-1); - buf = xmlOutputBufferCreateFile(file, NULL); + if (style->encoding != NULL) { + xmlCharEncodingHandlerPtr encoder; + + encoder = xmlFindCharEncodingHandler((char *)style->encoding); + if ((encoder != NULL) && + (xmlStrEqual(encoder->name, (const xmlChar *) "UTF-8"))) + encoder = NULL; + buf = xmlOutputBufferCreateFile(file, encoder); + } else { + buf = xmlOutputBufferCreateFile(file, NULL); + } + if (buf == NULL) return(-1); xsltSaveResultTo(buf, result, style); @@ -431,7 +452,17 @@ xsltSaveResultToFd(int fd, xmlDocPtr result, xsltStylesheetPtr style) { if ((fd < 0) || (result == NULL) || (style == NULL)) return(-1); - buf = xmlOutputBufferCreateFd(fd, NULL); + if (style->encoding != NULL) { + xmlCharEncodingHandlerPtr encoder; + + encoder = xmlFindCharEncodingHandler((char *)style->encoding); + if ((encoder != NULL) && + (xmlStrEqual(encoder->name, (const xmlChar *) "UTF-8"))) + encoder = NULL; + buf = xmlOutputBufferCreateFd(fd, encoder); + } else { + buf = xmlOutputBufferCreateFd(fd, NULL); + } if (buf == NULL) return(-1); xsltSaveResultTo(buf, result, style); diff --git a/tests/Makefile.am b/tests/Makefile.am index 7ca514a..59bbcc6 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,5 +1,5 @@ ## Process this file with automake to produce Makefile.in -SUBDIRS=docs REC1 REC2 REC general numbers +SUBDIRS=docs REC1 REC2 REC general numbers xmlspec test tests: all diff --git a/tests/xmlspec/Makefile.am b/tests/xmlspec/Makefile.am new file mode 100644 index 0000000..e3d10f1 --- /dev/null +++ b/tests/xmlspec/Makefile.am @@ -0,0 +1,18 @@ +## Process this file with automake to produce Makefile.in + +$(top_builddir)/libxslt/xsltproc: + @(cd ../../libxslt ; make xsltproc) + +EXTRA_DIST = REC-xml-20001006.xml xmlspec-v21.dtd W3C-REC.css \ + logo-REC xmlspec.xsl + +all: test + +test tests: $(top_builddir)/libxslt/xsltproc + @(rm -f .memdump ; touch .memdump) + @($(top_builddir)/libxslt/xsltproc -timing -v xmlspec.xsl REC-xml-20001006.xml > REC-xml-20001006.html 2> debug ; \ + grep implemented debug | sort | uniq -c ; \ + grep "ms$$" debug ; \ + grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\ + rm -f doc.res) + diff --git a/tests/xmlspec/REC-xml-20001006.xml b/tests/xmlspec/REC-xml-20001006.xml new file mode 100644 index 0000000..96a2ee8 --- /dev/null +++ b/tests/xmlspec/REC-xml-20001006.xml @@ -0,0 +1,3277 @@ + + + + + + + + + + + + +'"> + + + + + + + + +amp, +lt, +gt, +apos, +quot"> + + +]> + + +
+Extensible Markup Language (XML) +1.0 (Second Edition) +REC-xml-&iso6.doc.date; +W3C Recommendation +&draft.day;&draft.month;&draft.year; + +&http-ident;-&iso6.doc.date; +(XHTML, XML, PDF, XHTML +review version with color-coded revision indicators) +http://www.w3.org/TR/REC-xml + http://www.w3.org/TR/2000/WD-xml-2e-20000814 + http://www.w3.org/TR/1998/REC-xml-19980210 + +Tim BrayTextuality and Netscape +tbray@textuality.com +Jean PaoliMicrosoft +jeanpa@microsoft.com +C. M. Sperberg-McQueenUniversity +of Illinois at Chicago and Text Encoding Initiativecmsmcq@uic.edu + +Eve MalerSun Microsystems, +Inc.eve.maler@east.sun.com + + + +

The Extensible Markup Language (XML) is a subset of SGML that is completely +described in this document. Its goal is to enable generic SGML to be served, +received, and processed on the Web in the way that is now possible with HTML. +XML has been designed for ease of implementation and for interoperability +with both SGML and HTML.

+
+ +

This document has been reviewed by W3C Members and other interested parties +and has been endorsed by the Director as a W3C Recommendation. It is a stable +document and may be used as reference material or cited as a normative reference +from another document. W3C's role in making the Recommendation is to draw +attention to the specification and to promote its widespread deployment. This +enhances the functionality and interoperability of the Web.

+

This document specifies a syntax created by subsetting an existing, widely +used international text processing standard (Standard Generalized Markup Language, +ISO 8879:1986(E) as amended and corrected) for use on the World Wide Web. +It is a product of the W3C XML Activity, details of which can be found at http://www.w3.org/XML. [E100] +The English version of this specification is the only normative version. However, +for translations of this document, see http://www.w3.org/XML/#trans. A +list of current W3C Recommendations and other technical documents can be found +at http://www.w3.org/TR.

+

[E66]This +specification uses the term URI, which is defined by , +a work in progress expected to update and .

+

This second edition is not a new version of XML (first published 10 February 1998); +it merely incorporates the changes dictated by the first-edition errata (available +at http://www.w3.org/XML/xml-19980210-errata) +as a convenience to readers. The errata list for this second edition is available +at http://www.w3.org/XML/xml-V10-2e-errata.

+

Please report errors in this document to xml-editor@w3.org[E101]; archives are available.

+ +

C. M. Sperberg-McQueen's affiliation has changed since the publication +of the first edition. He is now at the World Wide Web Consortium, and can +be contacted at cmsmcq@w3.org.

+
+
+ +

Chicago, Vancouver, Mountain View, et al.: World-Wide Web Consortium, XML +Working Group, 1996, 1997, 2000.

+
+ +

Created in electronic form.

+
+ +English +Extended Backus-Naur Form (formal grammar) + + + +1997-12-03 : CMSMcQ : yet further changes +1997-12-02 : TB : further changes (see TB to XML WG, 2 December 1997) +1997-12-02 : CMSMcQ : deal with as many corrections and comments from +the proofreaders as possible: entify hard-coded document date in pubdate element, +change expansion of entity WebSGML, update status description as per Dan Connolly +(am not sure about refernece to Berners-Lee et al.), add 'The' to abstract +as per WG decision, move Relationship to Existing Standards to back matter +and combine with References, re-order back matter so normative appendices +come first, re-tag back matter so informative appendices are tagged informdiv1, +remove XXX XXX from list of 'normative' specs in prose, move some references +from Other References to Normative References, add RFC 1738, 1808, and 2141 +to Other References (they are not normative since we do not require the processor +to enforce any rules based on them), add reference to 'Fielding draft' (Berners-Lee +et al.), move notation section to end of body, drop URIchar non-terminal and +use SkipLit instead, lose stray reference to defunct nonterminal 'markupdecls', +move reference to Aho et al. into appendix (Tim's right), add prose note saying +that hash marks and fragment identifiers are NOT part of the URI formally +speaking, and are NOT legal in system identifiers (processor 'may' signal +an error). Work through: Tim Bray reacting to James Clark, Tim Bray on his +own, Eve Maler, NOT DONE YET: change binary / text to unparsed / parsed. handle +James's suggestion about < in attriubte values uppercase hex characters, +namechar list, +1997-12-01 : JB : add some column-width parameters +1997-12-01 : CMSMcQ : begin round of changes to incorporate recent +WG decisions and other corrections: binding sources of character encoding +info (27 Aug / 3 Sept), correct wording of Faust quotation (restore dropped +line), drop SDD from EncodingDecl, change text at version number 1.0, drop +misleading (wrong!) sentence about ignorables and extenders, modify definition +of PCData to make bar on msc grammatical, change grammar's handling of internal +subset (drop non-terminal markupdecls), change definition of includeSect to +allow conditional sections, add integral-declaration constraint on internal +subset, drop misleading / dangerous sentence about relationship of entities +with system storage objects, change table body tag to htbody as per EM change +to DTD, add rule about space normalization in public identifiers, add description +of how to generate our name-space rules from Unicode character database (needs +further work!). +1997-10-08 : TB : Removed %-constructs again, new rules for PE appearance. +1997-10-01 : TB : Case-sensitive markup; cleaned up element-type defs, +lotsa little edits for style +1997-09-25 : TB : Change to elm's new DTD, with substantial detail +cleanup as a side-effect +1997-07-24 : CMSMcQ : correct error (lost *) in definition of ignoreSectContents +(thanks to Makoto Murata) +Allow all empty elements to have end-tags, consistent with SGML TC +(as per JJC). +1997-07-23 : CMSMcQ : pre-emptive strike on pending corrections: introduce +the term 'empty-element tag', note that all empty elements may use it, and +elements declared EMPTY must use it. Add WFC requiring encoding decl to come +first in an entity. Redefine notations to point to PIs as well as binary entities. +Change autodetection table by removing bytes 3 and 4 from examples with Byte +Order Mark. Add content model as a term and clarify that it applies to both +mixed and element content. +1997-06-30 : CMSMcQ : change date, some cosmetic changes, changes to +productions for choice, seq, Mixed, NotationType, Enumeration. Follow James +Clark's suggestion and prohibit conditional sections in internal subset. TO +DO: simplify production for ignored sections as a result, since we don't need +to worry about parsers which don't expand PErefs finding a conditional section. +1997-06-29 : TB : various edits +1997-06-29 : CMSMcQ : further changes: Suppress old FINAL EDIT comments +and some dead material. Revise occurrences of % in grammar to exploit Henry +Thompson's pun, especially markupdecl and attdef. Remove RMD requirement relating +to element content (?). +1997-06-28 : CMSMcQ : Various changes for 1 July draft: Add text for +draconian error handling (introduce the term Fatal Error). RE deleta est (changing +wording from original announcement to restrict the requirement to validating +parsers). Tag definition of validating processor and link to it. Add colon +as name character. Change def of %operator. Change standard definitions of +lt, gt, amp. Strip leading zeros from #x00nn forms. +1997-04-02 : CMSMcQ : final corrections of editorial errors found in +last night's proofreading. Reverse course once more on well-formed: Webster's +Second hyphenates it, and that's enough for me. +1997-04-01 : CMSMcQ : corrections from JJC, EM, HT, and self +1997-03-31 : Tim Bray : many changes +1997-03-29 : CMSMcQ : some Henry Thompson (on entity handling), some +Charles Goldfarb, some ERB decisions (PE handling in miscellaneous declarations. +Changed Ident element to accept def attribute. Allow normalization of Unicode +characters. move def of systemliteral into section on literals. +1997-03-28 : CMSMcQ : make as many corrections as possible, from Terry +Allen, Norbert Mikula, James Clark, Jon Bosak, Henry Thompson, Paul Grosso, +and self. Among other things: give in on "well formed" (Terry is right), tentatively +rename QuotedCData as AttValue and Literal as EntityValue to be more informative, +since attribute values are the only place QuotedCData was used, +and vice versa for entity text and Literal. (I'd call it Entity Text, but +8879 uses that name for both internal and external entities.) +1997-03-26 : CMSMcQ : resynch the two forks of this draft, reapply +my changes dated 03-20 and 03-21. Normalize old 'may not' to 'must not' except +in the one case where it meant 'may or may not'. +1997-03-21 : TB : massive changes on plane flight from Chicago to Vancouver +1997-03-21 : CMSMcQ : correct as many reported errors as possible. +1997-03-20 : CMSMcQ : correct typos listed in CMSMcQ hand copy of spec. +1997-03-20 : CMSMcQ : cosmetic changes preparatory to revision for +WWW conference April 1997: restore some of the internal entity references +(e.g. to docdate, etc.), change character xA0 to &nbsp; and define nbsp +as &#160;, and refill a lot of paragraphs for legibility. +1996-11-12 : CMSMcQ : revise using Tim's edits: Add list type of NUMBERED +and change most lists either to BULLETS or to NUMBERED. Suppress QuotedNames, +Names (not used). Correct trivial-grammar doc type decl. Rename 'marked section' +as 'CDATA section' passim. Also edits from James Clark: Define the set of +characters from which [^abc] subtracts. Charref should use just [0-9] not +Digit. Location info needs cleaner treatment: remove? (ERB question). One +example of a PI has wrong pic. Clarify discussion of encoding names. Encoding +failure should lead to unspecified results; don't prescribe error recovery. +Don't require exposure of entity boundaries. Ignore white space in element +content. Reserve entity names of the form u-NNNN. Clarify relative URLs. And +some of my own: Correct productions for content model: model cannot consist +of a name, so "elements ::= cp" is no good. +1996-11-11 : CMSMcQ : revise for style. Add new rhs to entity declaration, +for parameter entities. +1996-11-10 : CMSMcQ : revise for style. Fix / complete section on names, +characters. Add sections on parameter entities, conditional sections. Still +to do: Add compatibility note on deterministic content models. Finish stylistic +revision. +1996-10-31 : TB : Add Entity Handling section +1996-10-30 : TB : Clean up term & termdef. Slip in ERB decision +re EMPTY. +1996-10-28 : TB : Change DTD. Implement some of Michael's suggestions. +Change comments back to //. Introduce language for XML namespace reservation. +Add section on white-space handling. Lots more cleanup. +1996-10-24 : CMSMcQ : quick tweaks, implement some ERB decisions. Characters +are not integers. Comments are /* */ not //. Add bibliographic refs to 10646, +HyTime, Unicode. Rename old Cdata as MsData since it's only seen +in marked sections. Call them attribute-value pairs not name-value pairs, +except once. Internal subset is optional, needs '?'. Implied attributes should +be signaled to the app, not have values supplied by processor. +1996-10-16 : TB : track down & excise all DSD references; introduce +some EBNF for entity declarations. +1996-10-?? : TB : consistency check, fix up scraps so they all parse, +get formatter working, correct a few productions. +1996-10-10/11 : CMSMcQ : various maintenance, stylistic, and organizational +changes: Replace a few literals with xmlpio and pic entities, to make them +consistent and ensure we can change pic reliably when the ERB votes. Drop +paragraph on recognizers from notation section. Add match, exact match to +terminology. Move old 2.2 XML Processors and Apps into intro. Mention comments, +PIs, and marked sections in discussion of delimiter escaping. Streamline discussion +of doctype decl syntax. Drop old section of 'PI syntax' for doctype decl, +and add section on partial-DTD summary PIs to end of Logical Structures section. +Revise DSD syntax section to use Tim's subset-in-a-PI mechanism. +1996-10-10 : TB : eliminate name recognizers (and more?) +1996-10-09 : CMSMcQ : revise for style, consistency through 2.3 (Characters) +1996-10-09 : CMSMcQ : re-unite everything for convenience, at least +temporarily, and revise quickly +1996-10-08 : TB : first major homogenization pass +1996-10-08 : TB : turn "current" attribute on div type into CDATA +1996-10-02 : TB : remould into skeleton + entities +1996-09-30 : CMSMcQ : add a few more sections prior to exchange with +Tim. +1996-09-20 : CMSMcQ : finish transcribing notes. +1996-09-19 : CMSMcQ : begin transcribing notes for draft. +1996-09-13 : CMSMcQ : made outline from notes of 09-06, do some housekeeping + + +
+ + +Introduction +

Extensible Markup Language, abbreviated XML, describes a class of data +objects called XML documents and partially +describes the behavior of computer programs which process them. XML is an +application profile or restricted form of SGML, the Standard Generalized Markup +Language . By construction, XML documents are conforming +SGML documents.

+

XML documents are made up of storage units called entities, +which contain either parsed or unparsed data. Parsed data is made up of characters, some of which form character +data, and some of which form markup. +Markup encodes a description of the document's storage layout and logical +structure. XML provides a mechanism to impose constraints on the storage layout +and logical structure.

+

A software module called +an XML processor is used to read XML documents and provide access +to their content and structure. It +is assumed that an XML processor is doing its work on behalf of another module, +called the application. This specification describes +the required behavior of an XML processor in terms of how it must read XML +data and the information it must provide to the application.

+ +Origin and Goals +

XML was developed by an XML Working Group (originally known as the SGML +Editorial Review Board) formed under the auspices of the World Wide Web Consortium +(W3C) in 1996. It was chaired by Jon Bosak of Sun Microsystems with the active +participation of an XML Special Interest Group (previously known as the SGML +Working Group) also organized by the W3C. The membership of the XML Working +Group is given in an appendix. Dan Connolly served as the WG's contact with +the W3C.

+

The design goals for XML are:

+ +

XML shall be straightforwardly usable over the Internet.

+

XML shall support a wide variety of applications.

+

XML shall be compatible with SGML.

+

It shall be easy to write programs which process XML documents.

+
+

The number of optional features in XML is to be kept to the absolute +minimum, ideally zero.

+

XML documents should be human-legible and reasonably clear.

+

The XML design should be prepared quickly.

+

The design of XML shall be formal and concise.

+

XML documents shall be easy to create.

+

Terseness in XML markup is of minimal importance.

+
+

This specification, together with associated standards (Unicode and ISO/IEC +10646 for characters, Internet RFC 1766 for language identification tags, +ISO 639 for language name codes, and ISO 3166 for country name codes), provides +all the information necessary to understand XML Version &versionOfXML; and +construct computer programs to process it.

+

This version of the XML specification &doc.distribution;.

+
+ +Terminology +

The terminology used to describe XML documents is defined in the body of +this specification. The terms defined in the following list are used in building +those definitions and in describing the actions of an XML processor: + + +

Conforming documents and XML processors +are permitted to but need not behave as described.

+ + + +

Conforming documents and XML processors +are required to behave as described; otherwise they are in error.

+
+ + +

A violation of the rules of this specification; +results are undefined. Conforming software may detect and report an error +and may recover from it.

+
+ + +

An error which a conforming XML processor must detect and report to the application. +After encountering a fatal error, the processor may continue processing the +data to search for further errors and may report such errors to the application. +In order to support correction of errors, the processor may make unprocessed +data from the document (with intermingled character data and markup) available +to the application. Once a fatal error is detected, however, the processor +must not continue normal processing (i.e., it must not continue to pass character +data and information about the document's logical structure to the application +in the normal way).

+
+ + +

Conforming software +may or must (depending on the modal verb in the sentence) behave as described; +if it does, it must provide users a means to enable or disable the behavior +described.

+
+ + +

A rule which applies to +all valid XML documents. Violations of validity +constraints are errors; they must, at user option, be reported by validating XML processors.

+
+ + +

A rule which applies +to all well-formed XML documents. Violations +of well-formedness constraints are fatal errors.

+
+ + +

(Of strings or names:) Two strings +or names being compared must be identical. Characters with multiple possible +representations in ISO/IEC 10646 (e.g. characters with both precomposed and +base+diacritic forms) match only if they have the same representation in both +strings. [E85]At +user option, processors may normalize such characters to some canonical form. No +case folding is performed. (Of strings and rules in the grammar:) A string +matches a grammatical production if it belongs to the language generated by +that production. (Of content and content models:) An element matches its declaration +when it conforms in the fashion described in the constraint .

+
+ + +

[E87]Marks +a sentence describing a feature of XML included solely to ensure +that XML remains compatible with SGML.

+
+ + +

[E87]Marks +a sentence describing a non-binding recommendation included to increase +the chances that XML documents can be processed by the existing installed +base of SGML processors which predate the &WebSGML;.

+
+

+
+
+ + +Documents +

A data object is an XML +document if it is well-formed, +as defined in this specification. A well-formed XML document may in addition +be valid if it meets certain further constraints.

+

Each XML document has both a logical and a physical structure. Physically, +the document is composed of units called entities. +An entity may refer to other entities to +cause their inclusion in the document. A document begins in a root +or document entity. Logically, the document +is composed of declarations, elements, comments, character references, and +processing instructions, all of which are indicated in the document by explicit +markup. The logical and physical structures must nest properly, as described +in .

+ +Well-Formed XML Documents +

A textual object is a well-formed +XML document if:

+ +

Taken as a whole, it matches the production labeled document.

+
+

It meets all the well-formedness constraints given in this specification.

+
+

Each of the parsed entities +which is referenced directly or indirectly within the document is well-formed.

+
+ +Document + +documentprolog element Misc* + + +

Matching the document production implies that:

+ +

It contains one or more elements.

+
+ +

There is exactly one element, +called the root, or document element, no part of which appears +in the content of any other element. [E17]For +all other elements, if the start-tag is in +the content of another element, the end-tag +is in the content of the same element. More simply stated, the elements, +delimited by start- and end-tags, nest properly within each other.

+
+

As a consequence of this, +for each non-root element C in the document, there is one other element P +in the document such that C is in the content of P, but +is not in the content of any other element that is in the content of P. P +is referred to as the parent of C, and C as +a child of P.

+
+ +Characters +

A parsed entity contains text, +a sequence of characters, which may +represent markup or character data. A character +is an atomic unit of text as specified by ISO/IEC 10646 [E67](see +also ). Legal characters are tab, carriage +return, line feed, and the legal [E35]graphic characters +of Unicode and ISO/IEC 10646. [E69]The +versions of these standards cited in were +current at the time this document was prepared. New characters may be added +to these standards by amendments or new editions. Consequently, XML processors +must accept any character in the range specified for Char. +The use of compatibility characters, as defined in section +6.8 of [E67](see +also D21 in section 3.6 of ), is discouraged.

+ +Character Range + + +Char#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] +any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + + +

The mechanism for encoding character code points into bit patterns may +vary from entity to entity. All XML processors must accept the UTF-8 and UTF-16 +encodings of 10646; the mechanisms for signaling which of the two is in use, +or for bringing other encodings into play, are discussed later, in .

+ +
+ +Common Syntactic Constructs +

This section defines some symbols used widely in the grammar.

+

S (white space) consists of one or more space (#x20) +characters, carriage returns, line feeds, or tabs.

+ +White Space + + +S(#x20 | #x9 | #xD | #xA)+ + + +

Characters are classified for convenience as letters, digits, or other +characters. [E30]A +letter consists of an alphabetic or syllabic base character or an ideographic +character. Full definitions of the specific characters in each class +are given in .

+

A Name is a token beginning +with a letter or one of a few punctuation characters, and continuing with +letters, digits, hyphens, underscores, colons, or full stops, together known +as name characters. Names beginning with the string xml, +or any string which would match (('X'|'x') ('M'|'m') ('L'|'l')), +are reserved for standardization in this or future versions of this specification.

+ +

[E98]The +Namespaces in XML Recommendation assigns a meaning +to names containing colon characters. Therefore, authors should not use the +colon in XML names except for namespace purposes, but XML processors must +accept the colon as a name character.

+
+

An Nmtoken (name token) is any mixture of name +characters.

+ +Names and Tokens + +NameCharLetter | Digit +| '.' | '-' | '_' | ':' | CombiningChar | Extender + + +Name(Letter | '_' | ':') (NameChar)* + + +NamesName (S Name)* + + +Nmtoken(NameChar)+ + + +NmtokensNmtoken (S Nmtoken)* + + +

Literal data is any quoted string not containing the quotation mark used +as a delimiter for that string. Literals are used for specifying the content +of internal entities (EntityValue), the values +of attributes (AttValue), and external identifiers +(SystemLiteral). Note that a SystemLiteral +can be parsed without scanning for markup.

+ +Literals + +EntityValue'"' ([^%&"] | PEReference +| Reference)* '"' +|  "'" ([^%&'] | PEReference | Reference)* "'" + + +AttValue'"' ([^<&"] | Reference)* +'"' +|  "'" ([^<&'] | Reference)* +"'" + + +SystemLiteral('"' [^"]* '"') | ("'" [^']* "'") + + +PubidLiteral'"' PubidChar* '"' +| "'" (PubidChar - "'")* "'" + + +PubidChar#x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] + + + +

[E72]Although +the EntityValue production allows the definition +of an entity consisting of a single explicit < in the literal +(e.g., <!ENTITY mylt "<">), it is strongly advised to avoid +this practice since any reference to that entity will cause a well-formedness +error.

+
+
+ +Character Data and Markup +

Text consists of intermingled character data and markup. Markup takes the form of start-tags, end-tags, empty-element tags, entity references, character +references, comments, CDATA section delimiters, document +type declarations, processing instructions, [E89]XML declarations, text declarations, +and any white space that is at the top level of the document entity (that +is, outside the document element and not inside any other markup).

+

All text that is not markup +constitutes the character data of the document.

+

The ampersand character (&) and the left angle bracket (<) may appear +in their literal form only when used as markup delimiters, or +within a comment, a processing +instruction, or a CDATA section.[E18]They +are also legal within the literal entity value +of an internal entity declaration; see . +If they are needed elsewhere, they must be escaped +using either numeric character references +or the strings &amp; and &lt; +respectively. The right angle bracket (>) may be represented using the string &gt;, +and must, for compatibility, be escaped +using &gt; or a character reference when it +appears in the string ]]> in content, when +that string is not marking the end of a CDATA +section.

+

In the content of elements, character data is any string of characters +which does not contain the start-delimiter of any markup. In a CDATA section, +character data is any string of characters not including the CDATA-section-close +delimiter, ]]>.

+

To allow attribute values to contain both single and double quotes, the +apostrophe or single-quote character (') may be represented as &apos;, +and the double-quote character (") as &quot;.

+ +Character Data + +CharData[^<&]* - ([^<&]* ']]>' [^<&]*) + + +
+ +Comments +

Comments may appear +anywhere in a document outside other markup; +in addition, they may appear within the document type declaration at places +allowed by the grammar. They are not part of the document's character +data; an XML processor may, but need not, make it possible for an +application to retrieve the text of comments. For +compatibility, the string -- (double-hyphen) +must not occur within comments. [E63]Parameter +entity references are not recognized within comments.

+ +Comments + +Comment'<!--' ((Char - '-') | ('-' +(Char - '-')))* '-->' + + +

An example of a comment:

+<!&como; declarations for <head> & <body> &comc;> +

[E27]Note +that the grammar does not allow a comment ending in --->. The +following example is not well-formed.

+<!-- B+, B, or B---> +
+ +Processing Instructions +

Processing instructions +(PIs) allow documents to contain instructions for applications.

+ +Processing Instructions + +PI'<?' PITarget (S +(Char* - (Char* &pic; Char*)))? &pic; + + +PITargetName - (('X' | 'x') ('M' | +'m') ('L' | 'l')) + + +

PIs are not part of the document's character +data, but must be passed through to the application. The PI begins +with a target (PITarget) used to identify the application +to which the instruction is directed. The target names XML, xml, +and so on are reserved for standardization in this or future versions of this +specification. The XML Notation mechanism +may be used for formal declaration of PI targets. [E63]Parameter +entity references are not recognized within processing instructions.

+
+ +CDATA Sections +

CDATA sections +may occur anywhere character data may occur; they are used to escape blocks +of text containing characters which would otherwise be recognized as markup. +CDATA sections begin with the string <![CDATA[ +and end with the string ]]>:

+ +CDATA Sections + +CDSectCDStart CData CDEnd + + +CDStart'<![CDATA[' + + +CData(Char* - (Char* +']]>' Char*)) + + +CDEnd']]>' + + +

Within a CDATA section, only the CDEnd string is +recognized as markup, so that left angle brackets and ampersands may occur +in their literal form; they need not (and cannot) be escaped using &lt; +and &amp;. CDATA sections cannot nest.

+

An example of a CDATA section, in which <greeting> +and </greeting> are recognized as character data, not markup:

+<![CDATA[<greeting>Hello, world!</greeting>]]> +
+ +Prolog and Document Type Declaration +

XML documents [E107]should +begin with an XML declaration which specifies the version of +XML being used. For example, the following is a complete XML document, well-formed but not valid:

+ Hello, world! ]]> +

and so is this:

+Hello, world!]]> +

The version number 1.0 should be used to indicate +conformance to this version of this specification; it is an error for a document +to use the value 1.0 if it does not conform to +this version of this specification. It is the intent of the XML working group +to give later versions of this specification numbers other than 1.0, +but this intent does not indicate a commitment to produce any future versions +of XML, nor if any are produced, to use any particular numbering scheme. Since +future versions are not ruled out, this construct is provided as a means to +allow the possibility of automatic version recognition, should it become necessary. +Processors may signal an error if they receive documents labeled with versions +they do not support.

+

The function of the markup in an XML document is to describe its storage +and logical structure and to associate attribute-value pairs with its logical +structures. XML provides a mechanism, the document +type declaration, to define constraints on the logical structure +and to support the use of predefined storage units. An XML document is valid if it has an associated +document type declaration and if the document complies with the constraints +expressed in it.

+

The document type declaration must appear before the first element +in the document.

+ +Prolog + + +prologXMLDecl? Misc* +(doctypedecl Misc*)? + + +XMLDecl&pio; VersionInfo EncodingDecl? SDDecl? S? &pic; + + +VersionInfoS 'version' Eq +("'" VersionNum "'" | '"' VersionNum +'"')[E15] + + +EqS? '=' S? + + +VersionNum([a-zA-Z0-9_.:] | '-')+ + + +MiscComment | PI +| S + + +

The XML document +type declaration contains or points to markup +declarations that provide a grammar for a class of documents. This +grammar is known as a document type definition, or DTD. The document +type declaration can point to an external subset (a special kind of external entity) containing markup declarations, +or can contain the markup declarations directly in an internal subset, or +can do both. The DTD for a document consists of both subsets taken together.

+

A markup declaration +is an element type declaration, an attribute-list declaration, an entity +declaration, or a notation declaration. +These declarations may be contained in whole or in part within parameter +entities, as described in the well-formedness and validity constraints +below. For [E14]further +information, see .

+ +Document Type Definition + + +doctypedecl'<!DOCTYPE' S Name +(S ExternalID)? S? +('[' (markupdecl | DeclSep)* +']' S?)? '>'[E109] + + +DeclSepPEReference | S +[E109] + + +markupdeclelementdecl | AttlistDecl | EntityDecl +| NotationDecl | PI | Comment + + +

[E82]Note +that it is possible to construct a well-formed document containing a doctypedecl +that neither points to an external subset nor contains an internal subset.

+

The markup declarations may be made up in whole or in part of the replacement text of parameter +entities. The productions later in this specification for individual +nonterminals (elementdecl, AttlistDecl, +and so on) describe the declarations after all the parameter +entities have been included.

+

[E75]Parameter +entity references are recognized anywhere in the DTD (internal and external +subsets and external parameter entities), except in literals, processing instructions, +comments, and the contents of ignored conditional sections (see ). +They are also recognized in entity value literals. The use of parameter entities +in the internal subset is restricted as described below.

+Root Element Type

The Name +in the document type declaration must match the element type of the root element.

+
+Proper Declaration/PE Nesting +

Parameter-entity replacement text +must be properly nested with markup declarations. That is to say, if either +the first character or the last character of a markup declaration (markupdecl +above) is contained in the replacement text for a parameter-entity +reference, both must be contained in the same replacement text.

+
+PEs in Internal Subset

In +the internal DTD subset, parameter-entity references +can occur only where markup declarations can occur, not within markup declarations. +(This does not apply to references that occur in external parameter entities +or to the external subset.)

+
+[E109]External +Subset

The external subset, if any, must match the production for extSubset.

+
+[E109]PE +Between Declarations

The replacement text of a parameter entity reference +in a DeclSep must match the production extSubsetDecl.

+
+

Like the internal subset, the external subset and any external parameter +entities [E109]referenced +in a DeclSep must consist of a series of +complete markup declarations of the types allowed by the non-terminal symbol markupdecl, interspersed with white space or parameter-entity references. However, portions of +the contents of the external subset or of [E109]these +external parameter entities may conditionally be ignored by using the conditional section construct; this is not +allowed in the internal subset.

+ +External Subset + + +extSubsetTextDecl? extSubsetDecl + + +extSubsetDecl( markupdecl | conditionalSect | DeclSep)* +[E109] + + +

The external subset and external parameter entities also differ from the +internal subset in that in them, parameter-entity +references are permitted within markup declarations, +not only between markup declarations.

+

An example of an XML document with a document type declaration:

+ Hello, world! ]]> +

The system identifier hello.dtd +gives the [E78]address +(a URI reference) of a DTD for the document.

+

The declarations can also be given locally, as in this example:

+ + +]> +Hello, world!]]> +

If both the external and internal subsets are used, the internal subset +is considered to occur before the external subset. +This has the effect that entity and attribute-list declarations in the internal +subset take precedence over those in the external subset.

+
+ +Standalone Document Declaration +

Markup declarations can affect the content of the document, as passed from +an XML processor to an application; examples +are attribute defaults and entity declarations. The standalone document declaration, +which may appear as a component of the XML declaration, signals whether or +not there are such declarations which appear external to the document +entity[E64] +or in parameter entities. An external +markup declaration is defined as a markup declaration occurring in +the external subset or in a parameter entity (external or internal, the latter +being included because non-validating processors are not required to read +them).

+ +Standalone Document Declaration + + +SDDecl S 'standalone' Eq +(("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) + + +

In a standalone document declaration, the value yes indicates +that there are no [E64]external markup declarations which +affect the information passed from the XML processor to the application. The +value no indicates that there are or may be such external +markup declarations. Note that the standalone document declaration only denotes +the presence of external declarations; the presence, in a document, +of references to external entities, when those entities are internally +declared, does not change its standalone status.

+

If there are no external markup declarations, the standalone document declaration +has no meaning. If there are external markup declarations but there is no +standalone document declaration, the value no is assumed.

+

Any XML document for which standalone="no" holds can be converted +algorithmically to a standalone document, which may be desirable for some +network delivery applications.

+Standalone Document Declaration

The +standalone document declaration must have the value no if +any external markup declarations contain declarations of:

+ +

attributes with default values, +if elements to which these attributes apply appear in the document without +specifications of values for these attributes, or

+

entities (other than &magicents;), if references +to those entities appear in the document, or

+

attributes with values subject to normalization, +where the attribute appears in the document with a value which will change +as a result of normalization, or

+

element types with element content, +if white space occurs directly within any instance of those types.

+
+
+

An example XML declaration with a standalone document declaration:

+<?xml version="&versionOfXML;" standalone='yes'?> +
+ +White Space Handling +

In editing XML documents, it is often convenient to use white space +(spaces, tabs, and blank lines[E39], +denoted by the nonterminal S in this specification) +to set apart the markup for greater readability. Such white space is typically +not intended for inclusion in the delivered version of the document. On the +other hand, significant white space that should be preserved +in the delivered version is common, for example in poetry and source code.

+

An XML processor must always pass +all characters in a document that are not markup through to the application. +A validating XML processor must also +inform the application which of these characters constitute white space appearing +in element content.

+

A special attribute named xml:space +may be attached to an element to signal an intention that in that element, +white space should be preserved by applications. In valid documents, this +attribute, like any other, must be declared +if it is used. When declared, it must be given as an enumerated +type whose [E81]values +are one or both of default and preserve. +For example:

+]]> + +<!-- [E81]--> +<!ATTLIST pre xml:space (preserve) #FIXED 'preserve'> +

The value default signals that applications' default white-space +processing modes are acceptable for this element; the value preserve +indicates the intent that applications preserve all the white space. This +declared intent is considered to apply to all elements within the content +of the element where it is specified, unless overriden with another instance +of the xml:space attribute.

+

The root element of any document is considered +to have signaled no intentions as regards application space handling, unless +it provides a value for this attribute or the attribute is declared with a +default value.

+
+ +End-of-Line Handling +

XML parsed entities are often stored +in computer files which, for editing convenience, are organized into lines. +These lines are typically separated by some combination of the characters +carriage-return (#xD) and line-feed (#xA).

+

To simplify the tasks of applications, +wherever an external parsed entity or the literal entity value of an internal +parsed entity contains either the literal two-character sequence #xD#xA +or a standalone literal #xD, an XML processor +must pass to the application the single character #xA. (This behavior can +conveniently be produced by normalizing all line breaks to #xA on input, before +parsing.)

+

[E104]To +simplify the tasks of applications, the characters +passed to an application by the XML processor +must be as if the XML processor normalized all line breaks in external parsed +entities (including the document entity) on input, before parsing, by translating +both the two-character sequence #xD #xA and any #xD that is not followed by +#xA to a single #xA character.

+
+ +Language Identification +

In document processing, it is often useful to identify the natural or formal +language in which the content is written. A special attribute +named xml:lang may be inserted in documents to specify the language +used in the contents and attribute values of any element in an XML document. +In valid documents, this attribute, like any other, must be declared +if it is used. [E73]The +values of the attribute are language identifiers as defined by , Tags +for the Identification of Languages, or its successor on the IETF +Standards Track.

+ +

[E73] tags are constructed from two-letter language codes as defined +by , from two-letter country codes as defined by , or from language identifiers registered with the Internet +Assigned Numbers Authority [E58]. It is expected that the successor +to will introduce three-letter language codes for +languages not presently covered by .

+
+

[E73](Productions +33 through 38 have been removed.)

+ +Language Identification + +LanguageIDLangcode ('-' Subcode)* + + +LangcodeISO639Code | IanaCode +| UserCode + + +ISO639Code([a-z] | [A-Z]) ([a-z] | [A-Z]) + + +IanaCode('i' | 'I') '-' ([a-z] | [A-Z])+ + + +UserCode('x' | 'X') '-' ([a-z] | [A-Z])+ + + +Subcode([a-z] | [A-Z])+ + + +

The Langcode may be any of the following:

+ +

a two-letter language code as defined by , Codes +for the representation of names of languages

+

a language identifier registered with the Internet Assigned Numbers +Authority ; these begin with the +prefix i- (or I-)

+
+

a language identifier assigned by the user, or agreed on between +parties in private use; these must begin with the prefix x- +or X- in order to ensure that they do not conflict +with names later standardized or registered with IANA

+
+

There may be any number of Subcode +segments; if the first subcode segment exists and the Subcode consists of +two letters, then it must be a country code from , +"Codes for the representation of names of countries." If the first subcode +consists of more than two letters, it must be a subcode for the language in +question registered with IANA, unless the Langcode +begins with the prefix "x-" or "X-".

+

It is customary to give the language code in lower case, and +the country code (if any) in upper case. Note that these values, unlike other +names in XML documents, are case insensitive.

+

For example:

+The quick brown fox jumps over the lazy dog.

+

What colour is it?

+

What color is it?

+ + Habe nun, ach! Philosophie, + Juristerei, und Medizin + und leider auch Theologie + durchaus studiert mit heißem Bemüh'n. +]]>
+ +

The intent declared with xml:lang is considered to apply to +all attributes and content of the element where it is specified, unless overridden +with an instance of xml:lang on another element within that content.

+ +

A simple declaration for xml:lang might take the form

+xml:lang NMTOKEN #IMPLIED +

but specific default values may also be given, if appropriate. In a collection +of French poems for English students, with glosses and notes in English, the xml:lang +attribute might be declared this way:

+ + +]]> +
+
+ + +Logical Structures +

Each XML +document contains one or more elements, the boundaries +of which are either delimited by start-tags +and end-tags, or, for empty +elements, by an empty-element tag. Each +element has a type, identified by name, sometimes called its generic +identifier (GI), and may have a set of attribute specifications. +Each attribute specification has a name +and a value.

+ +Element + +elementEmptyElemTag +| STag content ETag + + + +

This specification does not constrain the semantics, use, or (beyond syntax) +names of the element types and attributes, except that names beginning with +a match to (('X'|'x')('M'|'m')('L'|'l')) are reserved for standardization +in this or future versions of this specification.

+Element Type Match

The Name +in an element's end-tag must match the element type in the start-tag.

+
+Element Valid

An element is valid +if there is a declaration matching elementdecl +where the Name matches the element type, and one of +the following holds:

+ +

The declaration matches EMPTY and the element has no content.

+

The declaration matches children and the +sequence of child elements belongs +to the language generated by the regular expression in the content model, +with optional white space (characters matching the nonterminal S) +between [E59]the +start-tag and the first child element, between child elements, or between +the last child element and the end-tag. Note that a CDATA section containing +only white space does not match the nonterminal S, and +hence cannot appear in these positions.

+

The declaration matches Mixed and the content +consists of character data and child elements whose types match names in the +content model.

+

The declaration matches ANY, and the types of any child elements have been declared.

+
+
+ +Start-Tags, End-Tags, and Empty-Element Tags +

The beginning of every non-empty +XML element is marked by a start-tag.

+ +Start-tag + + +STag'<' Name (S Attribute)* S? '>' + + +AttributeName Eq AttValue + + + +

The Name in the start- and end-tags gives the element's type. The Name-AttValue +pairs are referred to as the attribute specifications of the +element, with the Name in each pair referred to as the attribute name +and the content of the AttValue (the text between the ' or " +delimiters) as the attribute value.[E46]Note +that the order of attribute specifications in a start-tag or empty-element +tag is not significant.

+Unique Att Spec

No attribute name +may appear more than once in the same start-tag or empty-element tag.

+
+Attribute Value Type

The attribute must +have been declared; the value must be of the type declared for it. (For attribute +types, see .)

+
+No External Entity References

Attribute +values cannot contain direct or indirect entity references to external entities.

+
+No < in Attribute Values +

The replacement text of any entity +referred to directly or indirectly in an attribute value [E83](other +than &lt;) must not contain a <.

+
+

An example of a start-tag:

+<termdef id="dt-dog" term="dog"> +

The end of every element that begins +with a start-tag must be marked by an end-tag containing a name +that echoes the element's type as given in the start-tag:

+ +End-tag + + +ETag'</' Name S? +'>' + + +

An example of an end-tag:

+</termdef> +

The text +between the start-tag and end-tag is called the element's content:

+ +Content of Elements + + +contentCharData? ((element +| Reference | CDSect +| PI | Comment) CharData?)* +[E71] + + +

[E97]An element +with no content is said to be empty. The representation +of an empty element is either a start-tag immediately followed by an end-tag, +or an empty-element tag. An empty-element +tag takes a special form:

+ +Tags for Empty Elements + + +EmptyElemTag'<' Name (S Attribute)* S? '/>' + + +

Empty-element tags may be used for any element which has no content, whether +or not it is declared using the keyword EMPTY. For +interoperability, the empty-element tag [E45]should +be used, and should only be used, for elements which are declared +EMPTY.

+

Examples of empty elements:

+<IMG align="left" + src="http://www.w3.org/Icons/WWW/w3c_home" /> +<br></br> +<br/> +
+ +Element Type Declarations +

The element structure of an XML document may, for validation +purposes, be constrained using element type and attribute-list declarations. +An element type declaration constrains the element's content.

+

Element type declarations often constrain which element types can appear +as children of the element. At user +option, an XML processor may issue a warning when a declaration mentions an +element type for which no declaration is provided, but this is not an error.

+

An element +type declaration takes the form:

+ +Element Type Declaration + + +elementdecl'<!ELEMENT' S Name S contentspec S? +'>' + + +contentspec'EMPTY' | 'ANY' | Mixed +| children + + +

where the Name gives the element type being declared.

+Unique Element Type Declaration

No element +type may be declared more than once.

+
+

Examples of element type declarations:

+<!ELEMENT br EMPTY> +<!ELEMENT p (#PCDATA|emph)* > +<!ELEMENT %name.para; %content.para; > +<!ELEMENT container ANY> + +Element Content +

An element type has element content when elements +of that type must contain only child +elements (no character data), optionally separated by white space (characters +matching the nonterminal S).In this case, the constraint includes a [E55]content +model, a simple grammar governing the allowed types of the +child elements and the order in which they are allowed to appear. +The grammar is built on content particles (cps), which +consist of names, choice lists of content particles, or sequence lists of +content particles:

+ +Element-content Models + + +children(choice | seq) +('?' | '*' | '+')? + + +cp(Name | choice +| seq) ('?' | '*' | '+')? + + +choice'(' S? cp ( S? '|' S? cp )+ S? ')'[E50] +[E52] + + + +seq'(' S? cp ( S? ',' S? cp )* S? ')'[E52] + + + +

where each Name is the type of an element which +may appear as a child. Any content +particle in a choice list may appear in the element +content at the location where the choice list appears in the grammar; +content particles occurring in a sequence list must each appear in the element content in the order given in the list. +The optional character following a name or list governs whether the element +or the content particles in the list may occur one or more (+), +zero or more (*), or zero or one times (?). The +absence of such an operator means that the element or content particle must +appear exactly once. This syntax and meaning are identical to those used in +the productions in this specification.

+

The content of an element matches a content model if and only if it is +possible to trace out a path through the content model, obeying the sequence, +choice, and repetition operators and matching each element in the content +against an element type in the content model. For +compatibility, it is an error if an element in the document can +match more than one occurrence of an element type in the content model. For +more information, see .

+ + +Proper Group/PE Nesting

Parameter-entity replacement text must be properly nested with [E11]parenthesized +groups. That is to say, if either of the opening or closing parentheses in +a choice, seq, or Mixed +construct is contained in the replacement text for a parameter +entity, both must be contained in the same replacement text.

+

[E19]For interoperability, if a parameter-entity reference +appears in a choice, seq, or Mixed construct, its replacement text should contain at +least one non-blank character, and neither the first nor last non-blank character +of the replacement text should be a connector (| or ,).

+
+

Examples of element-content models:

+<!ELEMENT spec (front, body, back?)> +<!ELEMENT div1 (head, (p | list | note)*, div2*)> +<!ELEMENT dictionary-body (%div.mix; | %dict.mix;)*> +
+ +Mixed Content +

An element type +has mixed content when elements of that type may contain character +data, optionally interspersed with child +elements. In this case, the types of the child elements may be constrained, +but not their order or their number of occurrences:

+ +Mixed-content Declaration + + +Mixed'(' S? '#PCDATA' (S? +'|' S? Name)* S? +')*' +| '(' S? '#PCDATA' S? ')' + + + +

where the Names give the types of elements that +may appear as children. [E10]The +keyword #PCDATA derives historically from the term parsed +character data.

+No Duplicate Types

The +same name must not appear more than once in a single mixed-content declaration.

+
+

Examples of mixed content declarations:

+<!ELEMENT p (#PCDATA|a|ul|b|i|em)*> +<!ELEMENT p (#PCDATA | %font; | %phrase; | %special; | %form;)* > +<!ELEMENT b (#PCDATA)> +
+
+ +Attribute-List Declarations +

Attributes are used to associate name-value +pairs with elements. Attribute specifications +may appear only within start-tags and empty-element tags; thus, the productions used to +recognize them appear in . Attribute-list declarations +may be used:

+ +

To define the set of attributes pertaining to a given element type.

+
+

To establish type constraints for these attributes.

+

To provide default values for +attributes.

+
+

Attribute-list +declarations specify the name, data type, and default value (if any) +of each attribute associated with a given element type:

+ +Attribute-list Declaration + +AttlistDecl'<!ATTLIST' S Name AttDef* S? '>' + + +AttDefS Name S AttType S DefaultDecl + + +

The Name in the AttlistDecl +rule is the type of an element. At user option, an XML processor may issue +a warning if attributes are declared for an element type not itself declared, +but this is not an error. The Name in the AttDef +rule is the name of the attribute.

+

When more than one AttlistDecl is provided +for a given element type, the contents of all those provided are merged. When +more than one definition is provided for the same attribute of a given element +type, the first declaration is binding and later declarations are ignored. [E9]For interoperability, writers of DTDs may choose +to provide at most one attribute-list declaration for a given element type, +at most one attribute definition for a given attribute name in an attribute-list +declaration, and at least one attribute definition in each attribute-list +declaration. For interoperability, an XML processor may at user option +issue a warning when more than one attribute-list declaration is provided +for a given element type, or more than one attribute definition is provided +for a given attribute, but this is not an error.

+ +Attribute Types +

XML attribute types are of three kinds: a string type, a set of tokenized +types, and enumerated types. The string type may take any literal string as +a value; the tokenized types have varying lexical and semantic constraints[E8]. +The validity constraints noted in the grammar are applied after the attribute +value has been normalized as described in .

+ +Attribute Types + + +AttTypeStringType | TokenizedType +| EnumeratedType + + +StringType'CDATA' + + +TokenizedType'ID' + +| 'IDREF' +| 'IDREFS' +| 'ENTITY' +| 'ENTITIES' +| 'NMTOKEN' +| 'NMTOKENS' + + +ID

Values of type ID must match the Name production. A name must not appear more than once +in an XML document as a value of this type; i.e., ID values must uniquely +identify the elements which bear them.

+
+One ID per Element Type

No element +type may have more than one ID attribute specified.

+
+ID Attribute Default

An ID attribute +must have a declared default of #IMPLIED or #REQUIRED.

+
+IDREF

Values of type IDREF must +match the Name production, and values of type IDREFS +must match Names; each Name +must match the value of an ID attribute on some element in the XML document; +i.e. IDREF values must match the value of some ID attribute.

+
+Entity Name

Values of type ENTITY +must match the Name production, values of type ENTITIES +must match Names; each Name +must match the name of an unparsed entity +declared in the DTD.

+
+Name Token

Values of type NMTOKEN +must match the Nmtoken production; values of type NMTOKENS +must match Nmtokens.

+
+ +

Enumerated attributes can take one of a list of values +provided in the declaration. There are two kinds of enumerated types:

+ +Enumerated Attribute Types + +EnumeratedTypeNotationType +| Enumeration + + +NotationType'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' + + + +Enumeration'(' S? Nmtoken +(S? '|' S? Nmtoken)* S? ')' + + +

A NOTATION attribute identifies a notation, +declared in the DTD with associated system and/or public identifiers, to be +used in interpreting the element to which the attribute is attached.

+Notation Attributes

Values of this type +must match one of the notation names +included in the declaration; all notation names in the declaration must be +declared.

+
+[E7]One +Notation Per Element Type

No element type may have more than one NOTATION +attribute specified.

+
+[E68]No +Notation on Empty Element

For compatibility, +an attribute of type NOTATION must not be declared on an element +declared EMPTY.

+
+Enumeration

Values of this type must match +one of the Nmtoken tokens in the declaration.

+
+

For interoperability, the same Nmtoken should not occur more than once in the enumerated +attribute types of a single element type.

+
+ +Attribute Defaults +

An attribute declaration provides information +on whether the attribute's presence is required, and if not, how an XML processor +should react if a declared attribute is absent in a document.

+ +Attribute Defaults + + +DefaultDecl'#REQUIRED' | '#IMPLIED' +| (('#FIXED' S)? AttValue) + + + +

In an attribute declaration, #REQUIRED means that the attribute +must always be provided, #IMPLIED that no default value is provided. If +the declaration is neither #REQUIRED nor #IMPLIED, then +the AttValue value contains the declared default +value; the #FIXED keyword states that the attribute must always have +the default value. If a default value is declared, when an XML processor encounters +an omitted attribute, it is to behave as though the attribute were present +with the declared default value.

+Required Attribute

If the default +declaration is the keyword #REQUIRED, then the attribute must be +specified for all elements of the type in the attribute-list declaration.

+
+Attribute Default Legal

The declared +default value must meet the lexical constraints of the declared attribute +type.

+
+Fixed Attribute Default

If an attribute +has a default value declared with the #FIXED keyword, instances of +that attribute must match the default value.

+
+

Examples of attribute-list declarations:

+<!ATTLIST termdef + id ID #REQUIRED + name CDATA #IMPLIED> +<!ATTLIST list + type (bullets|ordered|glossary) "ordered"> +<!ATTLIST form + method CDATA #FIXED "POST"> +
+ +[E70]Attribute-Value +Normalization +

Before the value of an attribute is passed to the application or checked +for validity, the XML processor must normalize the attribute value by applying +the algorithm below, or by using some other method such that the value passed +to the application is the same as that produced by the algorithm.

+ +

All line breaks must have been normalized on input to #xA as described +in , so the rest of this algorithm operates +on text normalized in this way.

+

Begin with a normalized value consisting of the empty string.

+
+

For each character, entity reference, or character reference in the +unnormalized attribute value, beginning with the first and continuing to the +last, do the following:

+ +

For a character reference, append the referenced character to the +normalized value.

+

For an entity reference, recursively apply step 3 of this algorithm +to the replacement text of the entity.

+

For a white space character (#x20, #xD, #xA, #x9), append a space +character (#x20) to the normalized value.

+

For another character, append the character to the normalized value.

+
+
+
+
+

If the attribute type is not CDATA, then the XML processor must further +process the normalized attribute value by discarding any leading and trailing +space (#x20) characters, and by replacing sequences of space (#x20) characters +by a single space (#x20) character.

+

Note that if the unnormalized attribute value contains a character reference +to a white space character other than space (#x20), the normalized value contains +the referenced character itself (#xD, #xA or #x9). This contrasts with the +case where the unnormalized value contains a white space character (not a +reference), which is replaced with a space character (#x20) in the normalized +value and also contrasts with the case where the unnormalized value contains +an entity reference whose replacement text contains a white space character; +being recursively processed, the white space character is replaced with a +space character (#x20) in the normalized value.

+

All attributes for which no declaration has been read should be treated +by a non-validating [E95]processor +as if declared CDATA.

+

Following are examples of attribute normalization. Given the following +declarations:

+<!ENTITY d "&#xD;"> +<!ENTITY a "&#xA;"> +<!ENTITY da "&#xD;&#xA;"> +

the attribute specifications in the left column below would be normalized +to the character sequences of the middle column if the attribute a +is declared NMTOKENS and to those of the right columns if a +is declared CDATA.

+ + + + +
Attribute specificationa is NMTOKENSa is CDATA
a=" + +xyz"x y z#x20 #x20 x y z
a="&d;&d;A&a;&a;B&da;"A +#x20 B#x20 #x20 A #x20 #x20 B #x20 #x20
a= +"&#xd;&#xd;A&#xa;&#xa;B&#xd;&#xa;"#xD +#xD A #xA #xA B #xD #xA#xD #xD A #xA #xA B #xD #xD
+

Note that the last example is invalid (but well-formed) if a +is declared to be of type NMTOKENS.

+
+
+ +Conditional Sections +

Conditional +sections are portions of the document type +declaration external subset which are included in, or excluded from, +the logical structure of the DTD based on the keyword which governs them.

+ +Conditional Section + + +conditionalSectincludeSect | ignoreSect + + +includeSect'<![' S? 'INCLUDE' S? '[' extSubsetDecl +']]>' [E90] + + + +ignoreSect'<![' S? 'IGNORE' S? '[' ignoreSectContents* +']]>'[E90] + + + +ignoreSectContentsIgnore ('<![' ignoreSectContents ']]>' Ignore)* + + +IgnoreChar* - (Char* +('<![' | ']]>') Char*) + + +[E90]Proper +Conditional Section/PE Nesting

If any of the "<![", +"[", or "]]>" of a conditional section is contained +in the replacement text for a parameter-entity reference, all of them must +be contained in the same replacement text.

+
+

Like the internal and external DTD subsets, a conditional section may contain +one or more complete declarations, comments, processing instructions, or nested +conditional sections, intermingled with white space.

+

If the keyword of the conditional section is INCLUDE, then the +contents of the conditional section are part of the DTD. If the keyword of +the conditional section is IGNORE, then the contents of the conditional +section are not logically part of the DTD. [E90]Note that +for reliable parsing, the contents of even ignored conditional sections must +be read in order to detect nested conditional sections and ensure that the +end of the outermost (ignored) conditional section is properly detected. +If a conditional section with a keyword of INCLUDE occurs within +a larger conditional section with a keyword of IGNORE, both the outer +and the inner conditional sections are ignored. [E90]The contents +of an ignored conditional section are parsed by ignoring all characters after +the "[" following the keyword, except conditional section starts +"<![" and ends "]]>", until the matching conditional +section end is found. Parameter entity references are not recognized in this +process.

+

If the keyword of the conditional section is a parameter-entity reference, +the parameter entity must be replaced by its content before the processor +decides whether to include or ignore the conditional section.

+

An example:

+<!ENTITY % draft 'INCLUDE' > +<!ENTITY % final 'IGNORE' > + +<![%draft;[ +<!ELEMENT book (comments*, title, body, supplements?)> +]]> +<![%final;[ +<!ELEMENT book (title, body, supplements?)> +]]> +
+ +
+ + +Physical Structures +

An XML document may consist of one +or many storage units. [E6]These +are called entities; they all have content and are +all (except for the document entity and +the external DTD subset) identified by +entity name. Each XML document has one entity +called the document entity, which serves +as the starting point for the XML processor +and may contain the whole document.

+

Entities may be either parsed or unparsed. A parsed +entity's contents are referred to as its replacement +text; this text is considered an +integral part of the document.

+

An unparsed entity +is a resource whose contents may or may not be text, +and if text, [E25]may +be other than XML. Each unparsed entity has an associated notation, identified by name. Beyond a requirement +that an XML processor make the identifiers for the entity and notation available +to the application, XML places no constraints on the contents of unparsed +entities.

+

Parsed entities are invoked by name using entity references; unparsed entities +by name, given in the value of ENTITY or ENTITIES attributes.

+

General entities +are entities for use within the document content. In this specification, general +entities are sometimes referred to with the unqualified term entity +when this leads to no ambiguity. [E53]Parameter +entities are parsed entities for use within the DTD. +These two types of entities use different forms of reference and are recognized +in different contexts. Furthermore, they occupy different namespaces; a parameter +entity and a general entity with the same name are two distinct entities.

+ +Character and Entity References +

A character +reference refers to a specific character in the ISO/IEC 10646 character +set, for example one not directly accessible from available input devices.

+ +Character Reference + +CharRef'&#' [0-9]+ ';' +| '&hcro;' [0-9a-fA-F]+ ';' + + +Legal Character

Characters referred +to using character references must match the production for Char.

+
+

If the character reference begins with &#x, +the digits and letters up to the terminating ; provide a hexadecimal +representation of the character's code point in ISO/IEC 10646. If it begins +just with &#, the digits up to the terminating ; +provide a decimal representation of the character's code point.

+

An entity reference +refers to the content of a named entity. References to parsed general entities use +ampersand (&) and semicolon (;) as delimiters. Parameter-entity references +use percent-sign (%) and semicolon (;) as delimiters.

+ +Entity Reference + +ReferenceEntityRef | CharRef + + +EntityRef'&' Name ';' + + +PEReference'%' Name ';' + + + +Entity Declared

In a document +without any DTD, a document with only an internal DTD subset which contains +no parameter entity references, or a document with standalone='yes', [E34]for +an entity reference that does not occur within the external subset or a parameter +entity, the Name given in the entity reference must match that in an entity +declaration that does not occur within the external subset or a +parameter entity, except that well-formed documents need not declare +any of the following entities: &magicents;. [E29]The declaration +of a parameter entity must precede any reference to it. Similarly, The +declaration of a general entity must precede any reference to it which appears +in a default value in an attribute-list declaration.

+

Note that if entities are declared in the external subset or in external +parameter entities, a non-validating processor is not +obligated to read and process their declarations; for such documents, +the rule that an entity must be declared is a well-formedness constraint only +if standalone='yes'.

+
+Entity Declared

In a document with +an external subset or external parameter entities with standalone='no', +the Name given in the entity reference must match that in an entity +declaration. For interoperability, valid documents should declare +the entities &magicents;, in the form specified in . +The declaration of a parameter entity must precede any reference to it. Similarly, +the declaration of a general entity must precede any [E92]attribute-list +declaration containing a default value with a direct or indirect reference +to that general entity.

+
+ +Parsed Entity

An entity reference must +not contain the name of an unparsed entity. +Unparsed entities may be referred to only in attribute +values declared to be of type ENTITY or ENTITIES.

+
+No Recursion

A parsed entity must +not contain a recursive reference to itself, either directly or indirectly.

+
+In DTD

Parameter-entity references may +only appear in the DTD.

+
+

Examples of character and entity references:

+Type <key>less-than</key> (&hcro;3C;) to save options. +This document was prepared on &docdate; and +is classified &security-level;. +

Example of a parameter-entity reference:

+ + + +%ISOLat2;]]> +
+ +Entity Declarations +

Entities are declared +thus:

+ +Entity Declaration + + +EntityDeclGEDecl | PEDecl + + + +GEDecl'<!ENTITY' S Name S EntityDef S? +'>' + + +PEDecl'<!ENTITY' S '%' S Name S PEDef S? '>' + + + +EntityDefEntityValue | (ExternalID NDataDecl?) + + + + +PEDefEntityValue | ExternalID + + +

The Name identifies the entity in an entity +reference or, in the case of an unparsed entity, in the value of +an ENTITY or ENTITIES attribute. If the same entity is declared +more than once, the first declaration encountered is binding; at user option, +an XML processor may issue a warning if entities are declared multiple times.

+ +Internal Entities +

If the +entity definition is an EntityValue, the defined +entity is called an internal entity. There is no separate physical +storage object, and the content of the entity is given in the declaration. +Note that some processing of entity and character references in the literal entity value may be required to produce +the correct replacement text: see .

+

An internal entity is a parsed entity.

+

Example of an internal entity declaration:

+<!ENTITY Pub-Status "This is a pre-release of the + specification."> +
+ +External Entities +

If the entity is not internal, +it is an external entity, declared as follows:

+ +External Entity Declaration + + +ExternalID'SYSTEM' S SystemLiteral +| 'PUBLIC' S PubidLiteral S SystemLiteral + + +NDataDeclS 'NDATA' S Name + + +

If the NDataDecl is present, this is a general unparsed entity; otherwise it is a parsed entity.

+Notation Declared

The Name +must match the declared name of a notation.

+
+

The SystemLiteral is called the entity's system +identifier. It is a [E88]URI +reference[E66] +(as defined in , updated by ), [E76]meant +to be dereferenced to obtain input for the XML processor to construct the +entity's replacement text. It is an error for a fragment identifier +(beginning with a # character) to be part of a system identifier. +Unless otherwise provided by information outside the scope of this specification +(e.g. a special XML element type defined by a particular DTD, or a processing +instruction defined by a particular application specification), relative URIs +are relative to the location of the resource within which the entity declaration +occurs. A URI might thus be relative to the document +entity, to the entity containing the external +DTD subset, or to some other external parameter +entity.

+

[E78]URI +references require encoding and escaping of certain characters. The disallowed +characters include all non-ASCII characters, plus the excluded characters +listed in Section 2.4 of , except for the number sign +(#) and percent sign (%) characters and the square +bracket characters re-allowed in . Disallowed characters +must be escaped as follows:

+ +

Each disallowed character is converted to UTF-8 +as one or more bytes.

+

Any octets corresponding to a disallowed character are escaped with +the URI escaping mechanism (that is, converted to %HH, +where HH is the hexadecimal notation of the byte value).

+

The original character is replaced by the resulting character sequence.

+
+
+

In addition to a system +identifier, an external identifier may include a public identifier. +An XML processor attempting to retrieve the entity's content may use the public +identifier to try to generate an alternative [E88]URI reference. +If the processor is unable to do so, it must use the [E88]URI +reference specified in the system literal. Before a match is attempted, +all strings of white space in the public identifier must be normalized to +single space characters (#x20), and leading and trailing white space must +be removed.

+

Examples of external entity declarations:

+<!ENTITY open-hatch + SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml"> +<!ENTITY open-hatch + PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" + "http://www.textuality.com/boilerplate/OpenHatch.xml"> +<!ENTITY hatch-pic + SYSTEM "../grafix/OpenHatch.gif" + NDATA gif > +
+
+ +Parsed Entities + +The Text Declaration +

External parsed entities [E107]should each begin with a text declaration.

+ +Text Declaration + + +TextDecl&pio; VersionInfo? EncodingDecl S? &pic; + + +

The text declaration must be provided literally, not by reference to a +parsed entity. No text declaration may appear at any position other than the +beginning of an external parsed entity. [E94]The text declaration +in an external parsed entity is not considered part of its replacement +text.

+
+ +Well-Formed Parsed Entities +

The document entity is well-formed if it matches the production labeled document. An external general parsed entity is well-formed +if it matches the production labeled extParsedEnt. [E109]All +external parameter entities are well-formed by definition.

+ +Well-Formed External Parsed Entity + +extParsedEntTextDecl? content + + +extPETextDecl? extSubsetDecl +[E109] + + +

An internal general parsed entity is well-formed if its replacement text +matches the production labeled content. All internal +parameter entities are well-formed by definition.

+

A consequence of well-formedness in entities is that the logical and physical +structures in an XML document are properly nested; no start-tag, end-tag, empty-element tag, element, comment, processing instruction, character +reference, or entity reference +can begin in one entity and end in another.

+
+ +Character Encoding in Entities +

Each external parsed entity in an XML document may use a different encoding +for its characters. All XML processors must be able to read entities in [E56]both +the UTF-8 and UTF-16 encodings. [E77]The terms UTF-8 +and UTF-16 in this specification do not apply to character +encodings with any other labels, even if the encodings or labels are very +similar to UTF-8 or UTF-16.

+

Entities encoded in UTF-16 must begin with the Byte Order Mark described +by [E67]Annex +F of , Annex H of , section +2.4 of , and section 2.7 of +(the ZERO WIDTH NO-BREAK SPACE character, #xFEFF). This is an encoding signature, +not part of either the markup or the character data of the XML document. XML +processors must be able to use this character to differentiate between UTF-8 +and UTF-16 encoded documents.

+

Although an XML processor is required to read only entities in the UTF-8 +and UTF-16 encodings, it is recognized that other encodings are used around +the world, and it may be desired for XML processors to read entities that +use them. [E47]In +the absence of external character encoding information (such as MIME headers), +parsed entities which are stored in an encoding other than UTF-8 or UTF-16 +must begin with a text declaration (see ) containing +an encoding declaration:

+ +Encoding Declaration + +EncodingDeclS 'encoding' Eq +('"' EncName '"' | "'" EncName +"'" ) + + +EncName[A-Za-z] ([A-Za-z0-9._] | '-')*Encoding +name contains only Latin characters + + +

In the document entity, the encoding +declaration is part of the XML declaration. +The EncName is the name of the encoding used.

+ +

In an encoding declaration, the values UTF-8, UTF-16, ISO-10646-UCS-2, and ISO-10646-UCS-4 should be used +for the various encodings and transformations of Unicode / ISO/IEC 10646, +the values ISO-8859-1, ISO-8859-2, +... [E106]ISO-8859-n (where n +is the part number) should be used for the parts of ISO 8859, and +the values ISO-2022-JP, Shift_JIS, +and EUC-JP should be used for the various encoded +forms of JIS X-0208-1997. [E57]It +is recommended that character encodings registered (as charsets) +with the Internet Assigned Numbers Authority [E58], +other than those just listed, be referred to using their registered names; +other encodings should use names starting with an x- prefix. +XML processors should match character encoding names in a case-insensitive +way and should either interpret an IANA-registered name as the encoding registered +at IANA for that name or treat it as unknown (processors are, of course, not +required to support all IANA-registered encodings).

+

In the absence of information provided by an external transport protocol +(e.g. HTTP or MIME), it is an error for +an entity including an encoding declaration to be presented to the XML processor +in an encoding other than that named in the declaration, [E5]for +an encoding declaration to occur other than at the beginning of an external +entity, or for an entity which begins with neither a Byte Order Mark +nor an encoding declaration to use an encoding other than UTF-8. Note that +since ASCII is a subset of UTF-8, ordinary ASCII entities do not strictly +need an encoding declaration.

+

[E5]It +is [E36]a +fatal error for a TextDecl to occur other +than at the beginning of an external entity.

+

It is a fatal error when an XML processor +encounters an entity with an encoding that it is unable to process. [E79]It +is a fatal error if an XML entity is determined (via default, encoding declaration, +or higher-level protocol) to be in a certain encoding but contains octet sequences +that are not legal in that encoding. It is also a fatal error if an XML entity +contains no encoding declaration and its content is not legal UTF-8 or UTF-16.

+

Examples of [E23]text +declarations containing encoding declarations:

+<?xml encoding='UTF-8'?> +<?xml encoding='EUC-JP'?> +
+
+ +XML Processor Treatment of Entities and References +

The table below summarizes the contexts in which character references, +entity references, and invocations of unparsed entities might appear and the +required behavior of an XML processor +in each case. The labels in the leftmost column describe the recognition context: + + +

as a reference anywhere after the start-tag +and before the end-tag of an element; corresponds +to the nonterminal content.

+ + + +

as a reference within either the value of an attribute in a start-tag, +or a default value in an attribute declaration; +corresponds to the nonterminal AttValue.

+
+ + +

as a Name, not a reference, appearing either as +the value of an attribute which has been declared as type ENTITY, +or as one of the space-separated tokens in the value of an attribute which +has been declared as type ENTITIES.

+
+ + +

as a reference within a parameter or internal entity's literal +entity value in the entity's declaration; corresponds to the nonterminal EntityValue.

+
+ + +

[E90]as +a reference within either the internal or external subsets of the DTD, but outside of an EntityValue, AttValue, PI, Comment, SystemLiteral, PubidLiteral, +or the contents of an ignored conditional section (see ).

+

.

+
+

+ + + + + + + + + + + +
Entity +TypeCharacter
ParameterInternal GeneralExternal Parsed +GeneralUnparsed
Reference +in ContentNot recognizedIncludedIncluded +if validatingForbiddenIncluded
Reference in Attribute ValueNot recognizedIncluded +in literalForbidden[E51]ForbiddenIncluded
Occurs as Attribute +ValueNot recognizedForbidden[E51]ForbiddenNotify[E51]Not recognized
Reference in EntityValueIncluded in literalBypassedBypassedForbiddenIncluded
Reference in DTDIncluded +as PEForbiddenForbiddenForbiddenForbidden
+ +Not Recognized +

Outside the DTD, the % character has no special significance; +thus, what would be parameter entity references in the DTD are not recognized +as markup in content. Similarly, the names of unparsed +entities are not recognized except when they appear in the value of an appropriately +declared attribute.

+
+ +Included +

An entity is included +when its replacement text is retrieved +and processed, in place of the reference itself, as though it were part of +the document at the location the reference was recognized. The replacement +text may contain both character data +and (except for parameter entities) markup, +which must be recognized in the usual way[E65], except that +the replacement text of entities used to escape markup delimiters (the entities &magicents;) +is always treated as data. (The string AT&amp;T; +expands to AT&T; and the remaining ampersand +is not recognized as an entity-reference delimiter.) A character reference +is included when the indicated character is processed in place +of the reference itself.

+
+ +Included If Validating +

When an XML processor recognizes a reference to a parsed entity, in order +to validate the document, the processor +must include its replacement text. If +the entity is external, and the processor is not attempting to validate the +XML document, the processor may, but need +not, include the entity's replacement text. If a non-validating [E95]processor +does not include the replacement text, it must inform the application that +it recognized, but did not read, the entity.

+

This rule is based on the recognition that the automatic inclusion provided +by the SGML and XML entity mechanism, primarily designed to support modularity +in authoring, is not necessarily appropriate for other applications, in particular +document browsing. Browsers, for example, when encountering an external parsed +entity reference, might choose to provide a visual indication of the entity's +presence and retrieve it for display only on demand.

+
+ +Forbidden +

The following are forbidden, and constitute fatal +errors:

+ +

the appearance of a reference to an unparsed +entity.

+

the appearance of any character or general-entity reference in the +DTD except within an EntityValue or AttValue.

+
+

a reference to an external entity in an attribute value.

+
+
+ +Included in Literal +

When an entity reference appears in +an attribute value, or a parameter entity reference appears in a literal entity +value, its replacement text is processed +in place of the reference itself as though it were part of the document at +the location the reference was recognized, except that a single or double +quote character in the replacement text is always treated as a normal data +character and will not terminate the literal. For example, this is well-formed:

+<!-- [E4] --> + +]]> +

while this is not:

+<!ENTITY EndAttr "27'" > +<element attribute='a-&EndAttr;> +
+ +Notify +

When the name of an unparsed entity +appears as a token in the value of an attribute of declared type ENTITY +or ENTITIES, a validating processor must inform the application of +the system and public +(if any) identifiers for both the entity and its associated notation.

+
+ +Bypassed +

When a general entity reference appears in the EntityValue +in an entity declaration, it is bypassed and left as is.

+
+ +Included as PE +

Just as with external parsed entities, parameter entities need only be included if validating. When a parameter-entity +reference is recognized in the DTD and included, its replacement +text is enlarged by the attachment of one leading and one following +space (#x20) character; the intent is to constrain the replacement text of +parameter entities to contain an integral number of grammatical tokens in +the DTD. [E96]This +behavior does not apply to parameter entity references within entity values; +these are described in .

+
+
+ +Construction of Internal Entity Replacement Text +

In discussing the treatment of internal entities, it is useful to distinguish +two forms of the entity's value. The literal +entity value is the quoted string actually present in the entity declaration, +corresponding to the non-terminal EntityValue. The replacement text +is the content of the entity, after replacement of character references and +parameter-entity references.

+

The literal entity value as given in an internal entity declaration (EntityValue) may contain character, parameter-entity, +and general-entity references. Such references must be contained entirely +within the literal entity value. The actual replacement text that is included as described above must contain the replacement +text of any parameter entities referred to, and must contain the character +referred to, in place of any character references in the literal entity value; +however, general-entity references must be left as-is, unexpanded. For example, +given the following declarations:

+ + +]]> +

then the replacement text for the entity book +is:

+La Peste: Albert Camus, +© 1947 Éditions Gallimard. &rights; +

The general-entity reference &rights; would +be expanded should the reference &book; appear +in the document's content or an attribute value.

+

These simple rules may have complex interactions; for a detailed discussion +of a difficult example, see .

+
+ +Predefined Entities +

Entity and character references can +both be used to escape the left angle bracket, ampersand, and +other delimiters. A set of general entities (&magicents;) is specified for +this purpose. Numeric character references may also be used; they are expanded +immediately when recognized and must be treated as character data, so the +numeric character references &#60; and &#38; +may be used to escape < and & when they occur +in character data.

+

All XML processors must recognize these entities whether they are declared +or not. For interoperability, valid XML +documents should declare these entities, like any others, before using them. [E80]If +the entities lt or amp are declared, they must be +declared as internal entities whose replacement text is a character reference +to the [E103]respective +character (less-than sign or ampersand) being escaped; the double +escaping is required for these entities so that references to them produce +a well-formed result. If the entities gt, apos, +or quot are declared, they must be declared as internal entities +whose replacement text is the single character being escaped (or a character +reference to that character; the double escaping here is unnecessary but harmless). +For example:

+ + + + +]]> +

Note that the < and & characters +in the declarations of lt and amp +are doubly escaped to meet the requirement that entity replacement be well-formed.

+
+ +Notation Declarations +

Notations identify +by name the format of unparsed entities, +the format of elements which bear a notation attribute, or the application +to which a processing instruction is addressed.

+

Notation declarations +provide a name for the notation, for use in entity and attribute-list declarations +and in attribute specifications, and an external identifier for the notation +which may allow an XML processor or its client application to locate a helper +application capable of processing data in the given notation.

+ +Notation Declarations + +NotationDecl'<!NOTATION' S Name S (ExternalID | PublicID) S? '>' + + +PublicID'PUBLIC' S PubidLiteral + + +[E22]Unique +Notation Name

Only one notation declaration can declare a given Name.

+
+

XML processors must provide applications with the name and external identifier(s) +of any notation declared and referred to in an attribute value, attribute +definition, or entity declaration. They may additionally resolve the external +identifier into the system identifier, file +name, or other information needed to allow the application to call a processor +for data in the notation described. (It is not an error, however, for XML +documents to declare and refer to notations for which notation-specific applications +are not available on the system where the XML processor or application is +running.)

+
+ +Document Entity +

The document entity +serves as the root of the entity tree and a starting-point for an XML processor. This specification does +not specify how the document entity is to be located by an XML processor; +unlike other entities, the document entity has no name and might well appear +on a processor input stream without any identification at all.

+
+
+ + +Conformance + +Validating and Non-Validating Processors +

Conforming XML processors fall into +two classes: validating and non-validating.

+

Validating and non-validating processors alike must report violations of +this specification's well-formedness constraints in the content of the document entity and any other parsed +entities that they read.

+

Validating +processors must[E21], +at user option, report violations of the constraints expressed by +the declarations in the DTD, and failures +to fulfill the validity constraints given in this specification. +To accomplish this, validating XML processors must read and process the entire +DTD and all external parsed entities referenced in the document.

+

Non-validating processors are required to check only the document +entity, including the entire internal DTD subset, for well-formedness. While they are not required +to check the document for validity, they are required to process +all the declarations they read in the internal DTD subset and in any parameter +entity that they read, up to the first reference to a parameter entity that +they do not read; that is to say, they must use the information +in those declarations to normalize +attribute values, include the replacement +text of internal entities, and supply default +attribute values. [E33]Except when standalone="yes", they +must not process entity +declarations or attribute-list declarations +encountered after a reference to a parameter entity that is not read, since +the entity may have contained overriding declarations.

+
+ +Using XML Processors +

The behavior of a validating XML processor is highly predictable; it must +read every piece of a document and report all well-formedness and validity +violations. Less is required of a non-validating processor; it need not read +any part of the document other than the document entity. This has two effects +that may be important to users of XML processors:

+ +

Certain well-formedness errors, specifically those that require reading +external entities, may not be detected by a non-validating processor. Examples +include the constraints entitled Entity Declared, Parsed Entity, and No +Recursion, as well as some of the cases described as forbidden in .

+

The information passed from the processor to the application may +vary, depending on whether the processor reads parameter and external entities. +For example, a non-validating processor may not normalize +attribute values, include the replacement +text of internal entities, or supply default +attribute values, where doing so depends on having read declarations +in external or parameter entities.

+
+

For maximum reliability in interoperating between different XML processors, +applications which use non-validating processors should not rely on any behaviors +not required of such processors. Applications which require facilities such +as the use of default attributes or internal entities which are declared in +external entities should use validating XML processors.

+
+
+ +Notation +

The formal grammar of XML is given in this specification using a simple +Extended Backus-Naur Form (EBNF) notation. Each rule in the grammar defines +one symbol, in the form

+symbol ::= expression +

Symbols are written with an initial capital letter if they are [E42]the +start symbol of a regular language, otherwise with an initial lower +case letter. Literal strings are quoted.

+

Within the expression on the right-hand side of a rule, the following expressions +are used to match strings of one or more characters: + + +

where N is a hexadecimal integer, the expression matches the +character in ISO/IEC 10646 whose canonical (UCS-4) code value, when interpreted +as an unsigned binary number, has the value indicated. The number of leading +zeros in the #xN form is insignificant; the number of leading +zeros in the corresponding code value is governed by the character encoding +in use and is not significant for XML.

+ + + +

matches any [E93]Char with a value in the range(s) indicated (inclusive).

+
+ + +

matches any Char with a value among the characters +enumerated. Enumerations and ranges can be mixed in one set of brackets.

+
+ + +

matches any [E93]Char with a value outside the range +indicated.

+
+ + +

matches any [E93]Char with a value not among the characters given. [E3]Enumerations +and ranges of forbidden values can be mixed in one set of brackets.

+
+ + +

matches a literal string matching that +given inside the double quotes.

+
+ + +

matches a literal string matching that +given inside the single quotes.

+
+ These symbols may be combined to match more complex patterns as follows, +where A and B represent simple expressions: + + +

expression is treated as a unit and may be combined as described +in this list.

+
+ + +

matches A or nothing; optional A.

+
+ + +

matches A followed by B. [E20]This +operator has higher precedence than alternation; thus A B | C D +is identical to (A B) | (C D).

+
+ + +

matches A or B but not both.

+
+ + +

matches any string that matches A but does not match B.

+
+ + +

matches one or more occurrences of A.[E20]Concatenation +has higher precedence than alternation; thus A+ | B+ is identical +to (A+) | (B+).

+
+ + +

matches zero or more occurrences of A. [E20]Concatenation +has higher precedence than alternation; thus A* | B* is identical +to (A*) | (B*).

+
+
Other notations used in the productions are: + + +

comment.

+
+ + +

well-formedness constraint; this identifies by name a constraint on well-formed documents associated with a production.

+
+ + +

validity constraint; this identifies by name a constraint on valid +documents associated with a production.

+
+

+
+ + + + +References + +Normative References + +[E58](Internet +Assigned Numbers Authority) Official Names for Character Sets, +ed. Keld Simonsen et al. See ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets. +IETF +(Internet Engineering Task Force). RFC 1766: Tags for the Identification +of Languages, ed. H. Alvestrand. 1995. +[E38] +(International Organization for Standardization). ISO 639:1988 (E). +Code for the representation of names of languages. [Geneva]: International +Organization for Standardization, 1988. +[E38] +(International Organization for Standardization). ISO 3166-1:1997 +(E). Codes for the representation of names of countries and their subdivisions — +Part 1: Country codes [Geneva]: International Organization for +Standardization, 1997. +ISO (International Organization for +Standardization). ISO/IEC 10646-1993 (E). Information technology — +Universal Multiple-Octet Coded Character Set (UCS) — Part 1: Architecture +and Basic Multilingual Plane. [Geneva]: International Organization +for Standardization, 1993 (plus amendments AM 1 through AM 7). +[E67] ISO (International +Organization for Standardization). ISO/IEC 10646-1:2000. Information +technology — Universal Multiple-Octet Coded Character Set (UCS) — +Part 1: Architecture and Basic Multilingual Plane. [Geneva]: International +Organization for Standardization, 2000. +The Unicode Consortium. The Unicode +Standard, Version 2.0. Reading, Mass.: Addison-Wesley Developers Press, +1996. +[E67] +The Unicode Consortium. The Unicode Standard, Version 3.0. Reading, +Mass.: Addison-Wesley Developers Press, 2000. ISBN 0-201-61633-5. + + + +Other References + +Aho, Alfred V., Ravi Sethi, and Jeffrey D. +Ullman. Compilers: Principles, Techniques, and Tools. +Reading: Addison-Wesley, 1986, rpt. corr. 1988. + Berners-Lee, T., R. Fielding, +and L. Masinter. Uniform Resource Identifiers (URI): Generic Syntax +and Semantics. 1997. (Work in progress; see updates to RFC1738.) +[E2]Brüggemann-Klein, +Anne. Formal Models in Document Processing. Habilitationsschrift. Faculty +of Mathematics at the University of Freiburg, 1993. (See ftp://ftp.informatik.uni-freiburg.de/documents/papers/brueggem/habil.ps.) +[E2]Brüggemann-Klein, +Anne, and Derick Wood. Deterministic Regular Languages. +Universität Freiburg, Institut für Informatik, Bericht 38, Oktober 1991. Extended +abstract in A. Finkel, M. Jantzen, Hrsg., STACS 1992, S. 173-184. Springer-Verlag, +Berlin 1992. Lecture Notes in Computer Science 577. Full version titled One-Unambiguous +Regular Languages in Information and Computation 140 (2): 229-253, +February 1998. +James Clark. Comparison of SGML and XML. See http://www.w3.org/TR/NOTE-sgml-xml-971215. +[E58](Internet +Assigned Numbers Authority) Registry of Language Tags, +ed. Keld Simonsen et al. +IETF +(Internet Engineering Task Force). RFC 1738: Uniform Resource Locators +(URL), ed. T. Berners-Lee, L. Masinter, M. McCahill. 1994. +IETF +(Internet Engineering Task Force). RFC 1808: Relative Uniform Resource +Locators, ed. R. Fielding. 1995. +IETF +(Internet Engineering Task Force). RFC 2141: URN Syntax, ed. +R. Moats. 1997. +[E78]IETF +(Internet Engineering Task Force). RFC 2279: UTF-8, a transformation +format of ISO 10646, ed. F. Yergeau, 1998. +[E48]IETF +(Internet Engineering Task Force). RFC 2376: XML Media Types. +ed. E. Whitehead, M. Murata. 1998. +[E66]IETF +(Internet Engineering Task Force). RFC 2396: Uniform Resource Identifiers +(URI): Generic Syntax. T. Berners-Lee, R. Fielding, L. Masinter. +1998. +[E66]IETF +(Internet Engineering Task Force). RFC 2732: Format for Literal +IPv6 Addresses in URL's. R. Hinden, B. Carpenter, L. Masinter. +1999. +[E77] +IETF (Internet Engineering Task Force). RFC 2781: UTF-16, an encoding +of ISO 10646, ed. P. Hoffman, F. Yergeau. 2000. +[E38] +(International Organization for Standardization). ISO 639:1988 (E). +Code for the representation of names of languages. [Geneva]: International +Organization for Standardization, 1988. +[E38] +(International Organization for Standardization). ISO 3166-1:1997 +(E). Codes for the representation of names of countries and their subdivisions — +Part 1: Country codes [Geneva]: International Organization for +Standardization, 1997. +ISO (International Organization for Standardization). ISO +8879:1986(E). Information processing — Text and Office Systems — +Standard Generalized Markup Language (SGML). First edition — +1986-10-15. [Geneva]: International Organization for Standardization, 1986. +ISO (International Organization for +Standardization). ISO/IEC 10744-1992 (E). Information technology — +Hypermedia/Time-based Structuring Language (HyTime). [Geneva]: +International Organization for Standardization, 1992. Extended Facilities +Annexe. [Geneva]: International Organization for Standardization, 1996. +[E43]ISO +(International Organization for Standardization). ISO 8879:1986 +TC2. Information technology — Document Description and Processing Languages. +[Geneva]: International Organization for Standardization, 1998. +[E98]Tim Bray, +Dave Hollander, and Andrew Layman, editors. Namespaces in XML. +Textuality, Hewlett-Packard, and Microsoft. World Wide Web Consortium, 1999. + + + +Character Classes +

Following the characteristics defined in the Unicode standard, characters +are classed as base characters (among others, these contain the alphabetic +characters of the Latin alphabet[E84], without +diacritics), ideographic characters, and combining characters (among +others, this class contains most diacritics)[E30]; these classes +combine to form the class of letters. Digits and extenders are also +distinguished.

+ +Characters + + +LetterBaseChar | Ideographic + + +BaseChar[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] +| [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] +| [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] +| [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] +| #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] +| [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC +| #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] +| [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] +| [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] +| [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] +| [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] +| [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] +| #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D +| [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] +| [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] +| [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] +| [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] +| [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] +| [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] +| [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD +| #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] +| [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D +| [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] +| [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] +| [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] +| [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] +| [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] +| [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE +| [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] +| [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 +| [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 +| [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] +| [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 +| [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] +| #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] +| [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] +| [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] +| #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 +| [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 +| #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] +| #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] +| [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB +| #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] +| [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] +| [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] +| [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] +| [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] +| [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] +| #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] +| [#x3105-#x312C] | [#xAC00-#xD7A3] + + +Ideographic[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] + + +CombiningChar[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] +| [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF +| [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 +| [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] +| [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] +| #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] +| #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] +| [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 +| #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] +| [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC +| [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] +| #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] +| [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] +| [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] +| [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] +| [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] +| [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] +| #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] +| #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] +| [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E +| #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] +| #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 +| [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 +| #x309A + + +Digit[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] +| [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] +| [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] +| [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29] + + +Extender#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 +| #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] +| [#x30FC-#x30FE] + + +

The character classes defined here can be derived from the Unicode [E67]2.0 +character database as follows:

+ +

Name start characters must have one of the categories Ll, Lu, Lo, +Lt, Nl.

+

Name characters other than Name-start characters must have one of +the categories Mc, Me, Mn, Lm, or Nd.

+

Characters in the compatibility area (i.e. with character code greater +than #xF900 and less than #xFFFE) are not allowed in XML names.

+

Characters which have a font or compatibility decomposition (i.e. +those with a compatibility formatting tag in field 5 of the +database -- marked by field 5 beginning with a <) are not +allowed.

+

The following characters are treated as name-start characters rather +than name characters, because the property file classifies them as Alphabetic: +[#x02BB-#x02C1], #x0559, #x06E5, #x06E6.

+

Characters #x20DD-#x20E0 are excluded (in accordance with Unicode [E67]2.0, +section 5.14).

+

Character #x00B7 is classified as an extender, because the property +list so identifies it.

+

Character #x0387 is added as a name character, because #x00B7 is +its canonical equivalent.

+

Characters ':' and '_' are allowed as name-start characters.

+
+

Characters '-' and '.' are allowed as name characters.

+
+
+ +XML and SGML +

[E43]XML +is designed to be a subset of SGML, in that every XML document should also +be a conforming SGML document. For a detailed comparison of the additional +restrictions that XML places on documents beyond those of SGML, see .

+
+ +Expansion of Entity and Character References +

This appendix contains some examples illustrating the sequence of entity- +and character-reference recognition and expansion, as specified in .

+

If the DTD contains the declaration

+An ampersand (&#38;) may be escaped +numerically (&#38;#38;) or with a general entity +(&amp;).

" >]]>
+

then the XML processor will recognize the character references when it +parses the entity declaration, and resolve them before storing the following +string as the value of the entity example:

+An ampersand (&) may be escaped +numerically (&#38;) or with a general entity +(&amp;).

]]>
+

A reference in the document to &example; +will cause the text to be reparsed, at which time the start- and end-tags +of the p element will be recognized and the three references will +be recognized and expanded, resulting in a p element with the following +content (all data, no delimiters or markup):

+ +

A more complex example will illustrate the rules and their effects fully. +In the following example, the line numbers are solely for reference.

+ +2 +4 +5 ' > +6 %xx; +7 ]> +8 This sample shows a &tricky; method.]]> +

This produces the following:

+ +

in line 4, the reference to character 37 is expanded immediately, +and the parameter entity xx is stored in the symbol +table with the value %zz;. Since the replacement +text is not rescanned, the reference to parameter entity zz +is not recognized. (And it would be an error if it were, since zz +is not yet declared.)

+

in line 5, the character reference &#60; +is expanded immediately and the parameter entity zz +is stored with the replacement text <!ENTITY tricky "error-prone" +>, which is a well-formed entity declaration.

+

in line 6, the reference to xx is recognized, +and the replacement text of xx (namely %zz;) +is parsed. The reference to zz is recognized in +its turn, and its replacement text (<!ENTITY tricky "error-prone" +>) is parsed. The general entity tricky +has now been declared, with the replacement text error-prone.

+
+

in line 8, the reference to the general entity tricky +is recognized, and it is expanded, so the full content of the test +element is the self-describing (and ungrammatical) string This sample +shows a error-prone method.

+
+
+ +Deterministic Content Models +

[E102]As +noted in , it is required that content +models in element type declarations be deterministic. This requirement is for compatibility with SGML (which calls deterministic +content models unambiguous); XML processors built +using SGML systems may flag non-deterministic content models as errors.

+

For example, the content model ((b, c) | (b, d)) is non-deterministic, +because given an initial b the [E95]XML processor +cannot know which b in the model is being matched without looking +ahead to see which element follows the b. In this case, the two references +to b can be collapsed into a single reference, making the model read (b, +(c | d)). An initial b now clearly matches only a single name +in the content model. The [E95]processor doesn't need to look ahead to see what follows; either c or d +would be accepted.

+

More formally: a finite state automaton may be constructed from the content +model using the standard algorithms, e.g. algorithm 3.5 in section 3.9 of +Aho, Sethi, and Ullman . In many such algorithms, a follow +set is constructed for each position in the regular expression (i.e., each +leaf node in the syntax tree for the regular expression); if any position +has a follow set in which more than one following position is labeled with +the same element type name, then the content model is in error and may be +reported as an error.

+

Algorithms exist which allow many but not all non-deterministic content +models to be reduced automatically to equivalent deterministic models; see +Brüggemann-Klein 1991 .

+
+ +[E105][E48]Autodetection +of Character Encodings +

The XML encoding declaration functions as an internal label on each entity, +indicating which character encoding is in use. Before an XML processor can +read the internal label, however, it apparently has to know what character +encoding is in use—which is what the internal label is trying to indicate. +In the general case, this is a hopeless situation. It is not entirely hopeless +in XML, however, because XML limits the general case in two ways: each implementation +is assumed to support only a finite set of character encodings, and the XML +encoding declaration is restricted in position and content in order to make +it feasible to autodetect the character encoding in use in each entity in +normal cases. Also, in many cases other sources of information are available +in addition to the XML data stream itself. Two cases may be distinguished, +depending on whether the XML entity is presented to the processor without, +or with, any accompanying (external) information. We consider the first case +first.

+ +Detection Without External Encoding Information +

Because each XML entity not accompanied by external +encoding information and not in UTF-8 or UTF-16 encoding must +begin with an XML encoding declaration, in which the first characters must +be '<?xml', any conforming processor can detect, after two +to four octets of input, which of the following cases apply. In reading this +list, it may help to know that in UCS-4, '<' is #x0000003C +and '?' is #x0000003F, and the Byte Order Mark +required of UTF-16 data streams is #xFEFF. The notation ## is used to denote any byte value except that two consecutive ##s cannot be both 00.

+

With a Byte Order Mark:

+ + + + + +
00 00 FE +FFUCS-4, big-endian machine (1234 order)
FF +FE 00 00UCS-4, little-endian machine (4321 order)
00 00 FF FEUCS-4, unusual octet order (2143)
FE FF 00 00UCS-4, unusual octet order (3412)
FE FF ## ##UTF-16, big-endian
FF FE ## ##UTF-16, little-endian
EF BB BFUTF-8
+

Without a Byte Order Mark:

+ + + + + + + + + +
00 00 00 3CUCS-4 or other encoding with a 32-bit code unit and ASCII +characters encoded as ASCII values, in respectively big-endian (1234), little-endian +(4321) and two unusual byte orders (2143 and 3412). The encoding declaration +must be read to determine which of UCS-4 or other supported 32-bit encodings +applies.
3C 00 00 00
00 00 3C 00
00 3C 00 00
00 3C 00 3FUTF-16BE or big-endian ISO-10646-UCS-2 +or other encoding with a 16-bit code unit in big-endian order and ASCII characters +encoded as ASCII values (the encoding declaration must be read to determine +which)
3C 00 3F 00UTF-16LE or little-endian +ISO-10646-UCS-2 or other encoding with a 16-bit code unit in little-endian +order and ASCII characters encoded as ASCII values (the encoding declaration +must be read to determine which)
3C 3F 78 6DUTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other +7-bit, 8-bit, or mixed-width encoding which ensures that the characters of +ASCII have their normal positions, width, and values; the actual encoding +declaration must be read to detect which of these applies, but since all of +these encodings use the same bit patterns for the relevant ASCII characters, +the encoding declaration itself may be read reliably
4C +6F A7 94EBCDIC (in some flavor; the full encoding declaration +must be read to tell which code page is in use)
OtherUTF-8 without an encoding declaration, or else the data stream is mislabeled +(lacking a required encoding declaration), corrupt, fragmentary, or enclosed +in a wrapper of some kind
+ +

In cases above which do not require reading the encoding declaration to +determine the encoding, section 4.3.3 still requires that the encoding declaration, +if present, be read and that the encoding name be checked to match the actual +encoding of the entity. Also, it is possible that new character encodings +will be invented that will make it necessary to use the encoding declaration +to determine the encoding, in cases where this is not required at present.

+
+

This level of autodetection is enough to read the XML encoding declaration +and parse the character-encoding identifier, which is still necessary to distinguish +the individual members of each family of encodings (e.g. to tell UTF-8 from +8859, and the parts of 8859 from each other, or to distinguish the specific +EBCDIC code page in use, and so on).

+

Because the contents of the encoding declaration are restricted to characters from the ASCII repertoire (however encoded), +a processor can reliably read the entire encoding declaration as soon as it +has detected which family of encodings is in use. Since in practice, all widely +used character encodings fall into one of the categories above, the XML encoding +declaration allows reasonably reliable in-band labeling of character encodings, +even when external sources of information at the operating-system or transport-protocol +level are unreliable. Note that since external parsed entities +in UTF-16 may begin with any character, this autodetection does not always +work. Also, Character encodings such as UTF-7 +that make overloaded usage of ASCII-valued bytes may fail to be reliably detected.

+

Once the processor has detected the character encoding in use, it can act +appropriately, whether by invoking a separate input routine for each case, +or by calling the proper conversion function on each character of input.

+

Like any self-labeling system, the XML encoding declaration will not work +if any software changes the entity's character set or encoding without updating +the encoding declaration. Implementors of character-encoding routines should +be careful to ensure the accuracy of the internal and external information +used to label the entity.

+
+ +Priorities in the Presence of External Encoding Information +

The second possible case occurs when the XML entity is accompanied by encoding +information, as in some file systems and some network protocols. When multiple +sources of information are available, their relative priority and the preferred +method of handling conflict should be specified as part of the higher-level +protocol used to deliver XML. In particular, please refer +to or its successor, which defines the text/xml +and application/xml MIME types and provides some useful guidance. +In the interests of interoperability, however, the following rule is recommended.

+ +

If an XML entity is in a file, the Byte-Order Mark and encoding declaration PI are used (if present) to determine the character encoding.[E74] +All other heuristics and sources of information are solely for error recovery.

+
+
+ +

If an XML entity is delivered with a MIME type of text/xml, then +the charset parameter on the MIME type determines the character +encoding method; all other heuristics and sources of information are solely +for error recovery.

+

If an XML entity is delivered with a MIME type of application/xml, +then the Byte-Order Mark and encoding-declaration PI are used (if present) +to determine the character encoding. All other heuristics and sources of information +are solely for error recovery.

+
+

These rules apply only in the absence of protocol-level documentation; +in particular, when the MIME types text/xml and application/xml are defined, +the recommendations of the relevant RFC will supersede these rules.

+
+
+ +W3C XML Working Group +

This specification was prepared and approved for publication by the W3C +XML Working Group (WG). WG approval of this specification does not necessarily +imply that all WG members voted for its approval. The current and former members +of the XML WG are:

+ +Jon BosakSunChair + +James ClarkTechnical Lead +Tim BrayTextuality and Netscape +XML Co-editor +Jean PaoliMicrosoftXML +Co-editor +C. M. Sperberg-McQueenU. of Ill. +XML Co-editor +Dan ConnollyW3CW3C Liaison + +Paula AngersteinTexcel +Steve DeRoseINSO +Dave HollanderHP +Eliot KimberISOGEN +Eve MalerArborText +Tom MaglieryNCSA +Murray MaloneySoftQuad, Grif +SA, Muzmo and Veo Systems +MURATA Makoto (FAMILY Given)Fuji +Xerox Information Systems +Joel NavaAdobe +Conleth O'ConnellVignette + +Peter SharpeSoftQuad +John TigueDataChannel + +
+ +W3C XML Core Group +

The second edition of this specification was prepared by the W3C XML Core +Working Group (WG). The members of the WG at the time of publication of this +edition were:

+ +Paula AngersteinVignette +Daniel AustinAsk Jeeves +Tim Boland +Allen BrownMicrosoft +Dan ConnollyW3CStaff +Contact +John CowanReuters Limited + +John EvdemonXMLSolutions Corporation + +Paul GrossoArbortextCo-Chair + +Arnaud Le HorsIBMCo-Chair + +Eve MalerSun Microsystems +Second Edition Editor +Jonathan MarshMicrosoft +MURATA Makoto (FAMILY Given)IBM + +Mark NeedlemanData Research Associates + +David OrchardJamcracker +Lew ShannonNCR +Richard TobinUniversity of Edinburgh + +Daniel VeillardW3C +Dan VintLexica +Norman WalshSun Microsystems + +François YergeauAlis Technologies +Errata List Editor +Kongyi ZhouOracle + +
+ +Production Notes +

This Second Edition was encoded in the XMLspec +DTD (which has documentation +available). The HTML versions were produced with a combination of the xmlspec.xsl, diffspec.xsl, +and REC-xml-2e.xsl +XSLT stylesheets. The PDF version was produced with the html2ps +facility and a distiller program.

+
+
diff --git a/tests/xmlspec/W3C-REC.css b/tests/xmlspec/W3C-REC.css new file mode 100644 index 0000000..a52b50c --- /dev/null +++ b/tests/xmlspec/W3C-REC.css @@ -0,0 +1,94 @@ +/* This is an SSI script. Policy: + (1) Use CVS + (2) send e-mail to w3t-comm@w3.org if you edit this +*/ +/* Style for a "Recommendation" */ + +/* + This is an SSI script. Policy: + (1) Use CVS + (2) send e-mail to w3t-comm@w3.org if you edit this + + Acknowledgments: + + - 'background-color' doesn't work on Mac IE 3, but 'background' + does (Susan Lesch ) + + - 'smaller' leads to illegible text in both Mac IE and Mac NS, + 'small' is better: works in Mac NS, but is always 12pt in Mac IE + 3 (Susan Lesch ) + + $Id$ +*/ + +body { + margin: 2em 1em 2em 70px; + font-family: sans-serif; + color: black; + background: white; + background-position: top left; + background-attachment: fixed; + background-repeat: no-repeat; +} + +th, td { /* ns 4 */ + font-family: sans-serif; +} + +h1, h2, h3, h4, h5, h6 { text-align: left } +h1, h2, h3 { color: #005A9C } +h1 { font: 170% sans-serif } +h2 { font: 140% sans-serif } +h3 { font: 120% sans-serif } +h4 { font: bold 100% sans-serif } +h5 { font: italic 100% sans-serif } +h6 { font: small-caps 100% sans-serif } + +.hide { display: none } + +div.head { margin-bottom: 1em } +div.head h1 { margin-top: 2em; clear: both } +div.head table { margin-left: 2em; margin-top: 2em } +div.head img { color: white; border: none } /* remove border from top image */ + +p.copyright { font-size: small } +p.copyright small { font-size: small } + +@media screen { /* hide from IE3 */ +a:hover { background: #ffa } +} + +pre { margin-left: 2em } +/* +p { + margin-top: 0.6em; + margin-bottom: 0.6em; +} +*/ +dt, dd { margin-top: 0; margin-bottom: 0 } /* opera 3.50 */ +dt { font-weight: bold } + +pre, code { font-family: monospace } /* navigator 4 requires this */ + +ul.toc { + list-style: disc; /* Mac NS has problem with 'none' */ + list-style: none; +} + +@media aural { + h1, h2, h3 { stress: 20; richness: 90 } + .hide { speak: none } + p.copyright { volume: x-soft; speech-rate: x-fast } + dt { pause-before: 20% } + pre { speak-punctuation: code } +} + +/* Things (icons) that should only show up in projection mode */ +.projection { display: none } +@media projection { .projection { display: inline } } + + +body { + background-image: url(logo-REC); +} + diff --git a/tests/xmlspec/logo-REC b/tests/xmlspec/logo-REC new file mode 100644 index 0000000..bddad3e Binary files /dev/null and b/tests/xmlspec/logo-REC differ diff --git a/tests/xmlspec/xmlspec-v21.dtd b/tests/xmlspec/xmlspec-v21.dtd new file mode 100644 index 0000000..b70fd69 --- /dev/null +++ b/tests/xmlspec/xmlspec-v21.dtd @@ -0,0 +1,1734 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/xmlspec/xmlspec.xsl b/tests/xmlspec/xmlspec.xsl new file mode 100644 index 0000000..5fe48da --- /dev/null +++ b/tests/xmlspec/xmlspec.xsl @@ -0,0 +1,2091 @@ + + + + + + + + + + + + + + + + + + + + 1 + 0 + + + 0 + 1 + + + + + + + + + + + + + + + + +
+ +

+ Abstract +

+ +
+
+ + + + + , + + + + + + + + + , + + + + + + ? + + + + + + + + + + + + + + + + + + " + + " + + + + + +
+ Editor + + s + + : +
+ +
+ + + + + +
+ + + - Second Edition + +
+
+ + + + + +
+ +
+
+ + + + + + + +
+ + + + + + + + + + + +
+
+ + + (See + + + + .) + +
+
+ + + + + + + + + + + + [ + + + + + + + + + ] + + + + + + +
+ +
+
+ + + + + + + + + + + diff- + + + +
+            
+          
+ + + +
+ + + + + +
+ +

+ Table of Contents +

+

+ +

+ + +

+ Appendi + + + ces + + + x + + +

+

+ +

+
+ +

+ + End Notes + +

+
+
+
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + diff- + + + + + /* + + */ + + + + + + + + + + diff- + + + + + /* + + */ + + + + + + + + + + + /* + + */ + + + + + + + + + + + + + + + + + + diff- + + + + + + + + + + [Constraint: + + ] + + + + + + + + + + diff- + + + + + + + + + + [Constraint: + + ] + + + + + + + + + + +
+ +
+
+ + + + + + + + + + +
+ +
+
+ + + + + + + + + + + +
+ +
+
+ + +
+ +
+
+ + +
+ +
+
+ + +
+ +
+
+ + +
+ +
+
+ + + + + + + Editorial note + + : + + + + + + + + + + +
+ + Editorial note + + : + + + + + + + + +   + +
+ +
+
+
+ + + + + + + + + + + + + + + +
+ + + diff- + + + +
+            
+          
+
+
+ + + + + + + + + + + + + + < + + > + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + + + + + + + + + + + + + + + + </p> + +
+ +
+ + <p> + +
+ + + + + + + + + + + + + + + + + + + + +

+ + + + Constraint: +

+
+ + + +

+ + + + + + + + + + + + + + +

+ + + + +

+ + + + + + + + + + + + + + +

+
+ + + +

+ + + + + + + + + + + + + + +

+
+ + + +
+ + + + + + + + + + + + + + +
+
+ + + +
+ + + + + + + + + + + + + + +
+
+ + + +
+ Example: + +
+
+ + + +

+ + + + + + + + + + + + + + + (Non-Normative) +

+
+ + +

+ +

+
+ + + +
+ +
+
+ + +

+ + + + Validity constraint: +

+
+ + +

+ + + + Well-formedness constraint: +

+ + + + + +
+

+ + W3C + +

+ +

+ + + + + +

+ + +

+ +

+
+ +

+ + + + + + + + + +

+
+ + + + +
+ +
+
+ + + + + + + + +
+ +
+
+ + + + + + + + + + + + + +
  • + +
  • +
    + + + + + + + + + + +
    + + +
    + + + + + + + + + +
    Latest version:
    +
    + +
    +
    + + + + + + + + + diff- + + + + +
    + + + +     + + + + + diff- + + + + + + + + + diff- + + + +    ::=    + + + + + + + + + + + + + + + + +
  • + +
  • +
    + + + + + + + + + + + + + + + + + + + +
    +

    + Note: +

    + +
    +
    + + + + +
    +

    + NOTICE: +

    + +
    +
    + + + + + + + + + + + + + + + + +
      + +
    +
    + + + +
      + +
    +
    + + + +

    + + + + + + + + + + + +

    +
    + + + + + + + + + + + + + + + + +
    Previous versions:
    +
    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    + + + + ( + + ) +

    +
    + + + + + + + +
    This version:
    +
    + +
    +
    + + + + + + + + " + + " + + + + + + + + + + +

    + Resolution: +

    + +
    + + + + + + + + + + + + + + + + diff- + + + + + + + + + + + + + + diff- + + + + + + + + + + + + + + ( + + ) + + + + + + + + +
    +
    + + + + + + + +
    + +
    + + + + +
    +

    + +

    +
    +
    + + + +

    + Source + : + +

    +
    + + + + + + + + + + + + + + + + <xsl:apply-templates select="header/title"/> + <xsl:if test="header/version"> + <xsl:text> </xsl:text> + <xsl:apply-templates select="header/version"/> + </xsl:if> + <xsl:if test="$additional.title != ''"> + <xsl:text> -- </xsl:text> + <xsl:value-of select="$additional.title"/> + </xsl:if> + + + + + + +
    +
    + +

    + + End Notes + +

    +
    + +
    +
    +
    + + +
    + + + + + + + + + + + + + + + + + + + [VC: ] + + + + [PROD: + + ] + + + + + [ + + ] + + + + + Unsupported specref to + + [ + + ] + (Contact stylesheet maintainer). + + ??? + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [ + + Definition + + : + + ] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      + +
    +
    + + + + + + + + + + + + + + + + + + + diff- + + + + + + + + + + [VC: + + ] + + + + + + + + + + diff- + + + + + + + + + + [VC: + + ] + + + + + + + + + +
    + +
    +
    + + + + + + + + + + + + + + + + + + diff- + + + + + + + + + + [WFC: + + ] + + + + + + + + + + diff- + + + + + + + + + + [WFC: + + ] + + + + + + + + + +
    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + + + + + + + + + + + +

    + +

    +
    + + + +
      + +
    +
    + +
  • + +
  • +
    + +
      + +
    +
    + +

    + +

    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + +
    +
    + +
    +
    + + + + [ + + ] + + + + [ + + + + ] + + + + + + + + + + + + + + + diff- + + + + + + + + + + + + diff- + + + +    ::=    + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    + + +      + + + + + + + + + +
    + +
    + + +          + + + + + + + + + +
    + +
    + + +              + + + + + + + + + +
    + +
    + + +                  + + + + + + + + + +
    +
    + + + + + + + + + + + + (Non-Normative) +
    + +
    + + + + + + W3C- + + WD + REC + REC + REC + NOTE + + .css + + + + + + + + # + + + + + + + + + + + +