diff options
-rw-r--r-- | ChangeLog | 16 | ||||
-rw-r--r-- | SAX.c | 58 | ||||
-rw-r--r-- | include/libxml/parserInternals.h | 6 | ||||
-rw-r--r-- | include/libxml/valid.h | 5 | ||||
-rw-r--r-- | parser.c | 47 | ||||
-rw-r--r-- | valid.c | 131 |
6 files changed, 222 insertions, 41 deletions
@@ -1,3 +1,19 @@ +Tue Feb 19 22:01:35 CET 2002 Daniel Veillard <daniel@veillard.com> + + * include/libxml/parserInternals.h parser.c: had to change + 2 internal parsing API when processing document content + to check the start and end of element content are defined + in the same entity + * valid.c include/libxml/valid.h: attribute normalization can + generate a validity error added xmlValidCtxtNormalizeAttributeValue() + with the context to report it. + * SAX.c: fixed the last known bugs, crazy validation constraints + when a document is standalone seems correctly handled. There + is a couple of open issues left which need consideration especially + PE93 on external unparsed entities and standalone status. + Ran 1819 tests: 1817 suceeded, 2 failed and 0 generated an error in 8.26 s. + The 2 tests left failing are actually in error. Cleanup done. + Tue Feb 19 15:17:02 CET 2002 Daniel Veillard <daniel@veillard.com> * valid.c: implemented E59 spaces in CDATA does not match the @@ -830,8 +830,13 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value) * Needed for HTML too: * http://www.w3.org/TR/html4/types.html#h-6.2 */ - nval = xmlValidNormalizeAttributeValue(ctxt->myDoc, ctxt->node, + ctxt->vctxt.valid = 1; + nval = xmlValidCtxtNormalizeAttributeValue(&ctxt->vctxt, + ctxt->myDoc, ctxt->node, fullname, value); + if (ctxt->vctxt.valid != 1) { + ctxt->valid = 0; + } if (nval != NULL) value = nval; @@ -985,16 +990,21 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value) * Check defaulted attributes from the DTD */ static void -xmlCheckDefaultedAttributesFromDtd(xmlParserCtxtPtr ctxt, - xmlDtdPtr dtd, const xmlChar *name, +xmlCheckDefaultedAttributes(xmlParserCtxtPtr ctxt, const xmlChar *name, const xmlChar *prefix, const xmlChar **atts) { xmlElementPtr elemDecl; const xmlChar *att; + int internal = 1; int i; - if ((dtd == NULL) || (name == NULL)) - return; - elemDecl = xmlGetDtdQElementDesc(dtd, name, prefix); + elemDecl = xmlGetDtdQElementDesc(ctxt->myDoc->intSubset, name, prefix); + if (elemDecl == NULL) { + elemDecl = xmlGetDtdQElementDesc(ctxt->myDoc->extSubset, name, prefix); + internal = 0; + } + +process_external_subset: + if (elemDecl != NULL) { xmlAttributePtr attr = elemDecl->attributes; /* @@ -1008,7 +1018,10 @@ xmlCheckDefaultedAttributesFromDtd(xmlParserCtxtPtr ctxt, if ((attr->defaultValue != NULL) && (xmlGetDtdQAttrDesc(ctxt->myDoc->extSubset, attr->elem, attr->name, - attr->prefix) == attr)) { + attr->prefix) == attr) && + (xmlGetDtdQAttrDesc(ctxt->myDoc->intSubset, + attr->elem, attr->name, + attr->prefix) == NULL)) { xmlChar *fulln; if (attr->prefix != NULL) { @@ -1039,9 +1052,7 @@ xmlCheckDefaultedAttributesFromDtd(xmlParserCtxtPtr ctxt, ctxt->vctxt.error(ctxt->vctxt.userData, "standalone: attribute %s on %s defaulted from external subset\n", fulln, attr->elem); - /* Waiting on the XML Core WG decision on this ctxt->valid = 0; - */ } } attr = attr->nexth; @@ -1053,7 +1064,18 @@ xmlCheckDefaultedAttributesFromDtd(xmlParserCtxtPtr ctxt, */ attr = elemDecl->attributes; while (attr != NULL) { - if (attr->defaultValue != NULL) { + /* + * Make sure that attributes redefinition occuring in the + * internal subset are not overriden by definitions in the + * external subset. + */ + if ((attr->defaultValue != NULL) && + (xmlGetDtdQAttrDesc(ctxt->myDoc->extSubset, + attr->elem, attr->name, + attr->prefix) == attr) && + (xmlGetDtdQAttrDesc(ctxt->myDoc->intSubset, + attr->elem, attr->name, + attr->prefix) == NULL)) { /* * the element should be instantiated in the tree if: * - this is a namespace prefix @@ -1090,13 +1112,20 @@ xmlCheckDefaultedAttributesFromDtd(xmlParserCtxtPtr ctxt, att = atts[i]; } } - if (att == NULL) + if (att == NULL) { attribute(ctxt, fulln, attr->defaultValue); + } xmlFree(fulln); } } attr = attr->nexth; } + if (internal == 1) { + elemDecl = xmlGetDtdQElementDesc(ctxt->myDoc->extSubset, + name, prefix); + internal = 0; + goto process_external_subset; + } } } @@ -1206,12 +1235,7 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts) if ((!ctxt->html) && ((ctxt->myDoc->intSubset != NULL) || (ctxt->myDoc->extSubset != NULL))) { - if (ctxt->myDoc->intSubset != NULL) - xmlCheckDefaultedAttributesFromDtd(ctxt, ctxt->myDoc->intSubset, - name, prefix, atts); - if (ctxt->myDoc->extSubset != NULL) - xmlCheckDefaultedAttributesFromDtd(ctxt, ctxt->myDoc->extSubset, - name, prefix, atts); + xmlCheckDefaultedAttributes(ctxt, name, prefix, atts); } /* diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h index 2b4c1a66..93e230e5 100644 --- a/include/libxml/parserInternals.h +++ b/include/libxml/parserInternals.h @@ -273,14 +273,16 @@ int xmlParseAttributeType (xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree); void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt); xmlElementContentPtr xmlParseElementMixedContentDecl - (xmlParserCtxtPtr ctxt); + (xmlParserCtxtPtr ctxt, + xmlParserInputPtr inputchk); #ifdef VMS xmlElementContentPtr xmlParseElementChildrenContentD (xmlParserCtxtPtr ctxt); #define xmlParseElementChildrenContentDecl xmlParseElementChildrenContentD #else xmlElementContentPtr xmlParseElementChildrenContentDecl - (xmlParserCtxtPtr ctxt); + (xmlParserCtxtPtr ctxt, + xmlParserInputPtr inputchk); #endif int xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name, diff --git a/include/libxml/valid.h b/include/libxml/valid.h index d49a2647..db427ff3 100644 --- a/include/libxml/valid.h +++ b/include/libxml/valid.h @@ -223,6 +223,11 @@ xmlChar * xmlValidNormalizeAttributeValue(xmlDocPtr doc, xmlNodePtr elem, const xmlChar *name, const xmlChar *value); +xmlChar * xmlValidCtxtNormalizeAttributeValue(xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem, + const xmlChar *name, + const xmlChar *value); int xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc, xmlAttributePtr attr); @@ -4132,7 +4132,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { * returns: the list of the xmlElementContentPtr describing the element choices */ xmlElementContentPtr -xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) { +xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) { xmlElementContentPtr ret = NULL, cur = NULL, n; xmlChar *elem = NULL; @@ -4145,7 +4145,13 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) { SKIP_BLANKS; SHRINK; if (RAW == ')') { - ctxt->entity = ctxt->input; + if ((ctxt->validate) && (ctxt->input != inputchk)) { + ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; + if (ctxt->vctxt.error != NULL) + ctxt->vctxt.error(ctxt->vctxt.userData, +"Element content declaration doesn't start and stop in the same entity\n"); + ctxt->valid = 0; + } NEXT; ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA); if (RAW == '*') { @@ -4203,7 +4209,13 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) { xmlFree(elem); } ret->ocur = XML_ELEMENT_CONTENT_MULT; - ctxt->entity = ctxt->input; + if ((ctxt->validate) && (ctxt->input != inputchk)) { + ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; + if (ctxt->vctxt.error != NULL) + ctxt->vctxt.error(ctxt->vctxt.userData, +"Element content declaration doesn't start and stop in the same entity\n"); + ctxt->valid = 0; + } SKIP(2); } else { if (elem != NULL) xmlFree(elem); @@ -4273,7 +4285,7 @@ xmlParseElementChildrenContentD #else xmlParseElementChildrenContentDecl #endif -(xmlParserCtxtPtr ctxt) { +(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) { xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL; xmlChar *elem; xmlChar type = 0; @@ -4281,10 +4293,12 @@ xmlParseElementChildrenContentDecl SKIP_BLANKS; GROW; if (RAW == '(') { + xmlParserInputPtr input = ctxt->input; + /* Recurse on first child */ NEXT; SKIP_BLANKS; - cur = ret = xmlParseElementChildrenContentDecl(ctxt); + cur = ret = xmlParseElementChildrenContentDecl(ctxt, input); SKIP_BLANKS; GROW; } else { @@ -4437,10 +4451,11 @@ xmlParseElementChildrenContentDecl SKIP_BLANKS; GROW; if (RAW == '(') { + xmlParserInputPtr input = ctxt->input; /* Recurse on second child */ NEXT; SKIP_BLANKS; - last = xmlParseElementChildrenContentDecl(ctxt); + last = xmlParseElementChildrenContentDecl(ctxt, input); SKIP_BLANKS; } else { elem = xmlParseName(ctxt); @@ -4483,7 +4498,13 @@ xmlParseElementChildrenContentDecl if (last != NULL) last->parent = cur; } - ctxt->entity = ctxt->input; + if ((ctxt->validate) && (ctxt->input != inputchk)) { + ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; + if (ctxt->vctxt.error != NULL) + ctxt->vctxt.error(ctxt->vctxt.userData, +"Element content declaration doesn't start and stop in the same entity\n"); + ctxt->valid = 0; + } NEXT; if (RAW == '?') { if (ret != NULL) @@ -4583,20 +4604,12 @@ xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name, (NXT(2) == 'C') && (NXT(3) == 'D') && (NXT(4) == 'A') && (NXT(5) == 'T') && (NXT(6) == 'A')) { - tree = xmlParseElementMixedContentDecl(ctxt); + tree = xmlParseElementMixedContentDecl(ctxt, input); res = XML_ELEMENT_TYPE_MIXED; } else { - tree = xmlParseElementChildrenContentDecl(ctxt); + tree = xmlParseElementChildrenContentDecl(ctxt, input); res = XML_ELEMENT_TYPE_ELEMENT; } - if ((ctxt->entity != NULL) && (input != ctxt->entity)) { - ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, -"Element content declaration doesn't start and stop in the same entity\n"); - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; - } SKIP_BLANKS; *result = tree; return(res); @@ -2970,11 +2970,99 @@ xmlValidateAttributeValue2(xmlValidCtxtPtr ctxt, xmlDocPtr doc, } /** + * xmlValidCtxtNormalizeAttributeValue: + * @ctxt: the validation context + * @doc: the document + * @elem: the parent + * @name: the attribute name + * @value: the attribute value + * @ctxt: the validation context or NULL + * + * Does the validation related extra step of the normalization of attribute + * values: + * + * If the declared value is not CDATA, then the XML processor must further + * process the normalized attribute value by discarding any leading and + * trailing space (#x20) characters, and by replacing sequences of space + * (#x20) characters by single space (#x20) character. + * + * Also check VC: Standalone Document Declaration in P32, and update + * ctxt->valid accordingly + * + * returns a new normalized string if normalization is needed, NULL otherwise + * the caller must free the returned value. + */ + +xmlChar * +xmlValidCtxtNormalizeAttributeValue(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlNodePtr elem, const xmlChar *name, const xmlChar *value) { + xmlChar *ret, *dst; + const xmlChar *src; + xmlAttributePtr attrDecl = NULL; + int extsubset = 0; + + if (doc == NULL) return(NULL); + if (elem == NULL) return(NULL); + if (name == NULL) return(NULL); + if (value == NULL) return(NULL); + + if ((elem->ns != NULL) && (elem->ns->prefix != NULL)) { + xmlChar qname[500]; + snprintf((char *) qname, sizeof(qname), "%s:%s", + elem->ns->prefix, elem->name); + qname[sizeof(qname) - 1] = 0; + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, qname, name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) { + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, qname, name); + if (attrDecl != NULL) + extsubset = 1; + } + } + if ((attrDecl == NULL) && (doc->intSubset != NULL)) + attrDecl = xmlGetDtdAttrDesc(doc->intSubset, elem->name, name); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) { + attrDecl = xmlGetDtdAttrDesc(doc->extSubset, elem->name, name); + if (attrDecl != NULL) + extsubset = 1; + } + + if (attrDecl == NULL) + return(NULL); + if (attrDecl->atype == XML_ATTRIBUTE_CDATA) + return(NULL); + + ret = xmlStrdup(value); + if (ret == NULL) + return(NULL); + src = value; + dst = ret; + while (*src == 0x20) src++; + while (*src != 0) { + if (*src == 0x20) { + while (*src == 0x20) src++; + if (*src != 0) + *dst++ = 0x20; + } else { + *dst++ = *src++; + } + } + *dst = 0; + if ((doc->standalone) && (extsubset == 1) && (!xmlStrEqual(value, ret))) { + VERROR(ctxt->userData, +"standalone: %s on %s value had to be normalized based on external subset declaration\n", + name, elem->name); + ctxt->valid = 0; + } + return(ret); +} + +/** * xmlValidNormalizeAttributeValue: * @doc: the document * @elem: the parent * @name: the attribute name * @value: the attribute value + * @ctxt: the validation context or NULL * * Does the validation related extra step of the normalization of attribute * values: @@ -3234,7 +3322,6 @@ xmlValidateElementDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc, elem->name); ret = 0; } - /* One ID per Element Type * already done when registering the attribute if (xmlScanIDAttributeDecl(ctxt, elem) > 1) { @@ -4195,9 +4282,10 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, xmlElementContentPtr cont; xmlAttributePtr attr; xmlNodePtr child; - int ret = 1; + int ret = 1, tmp; const xmlChar *name; const xmlChar *prefix = NULL; + int extsubset = 0; CHECK_DTD; @@ -4275,9 +4363,12 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, if (prefix != NULL) { elemDecl = xmlGetDtdQElementDesc(doc->intSubset, elem->name, prefix); - if ((elemDecl == NULL) && (doc->extSubset != NULL)) + if ((elemDecl == NULL) && (doc->extSubset != NULL)) { elemDecl = xmlGetDtdQElementDesc(doc->extSubset, elem->name, prefix); + if (elemDecl != NULL) + extsubset = 1; + } } /* @@ -4287,8 +4378,11 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, */ if (elemDecl == NULL) { elemDecl = xmlGetDtdElementDesc(doc->intSubset, elem->name); - if ((elemDecl == NULL) && (doc->extSubset != NULL)) + if ((elemDecl == NULL) && (doc->extSubset != NULL)) { elemDecl = xmlGetDtdElementDesc(doc->extSubset, elem->name); + if (elemDecl != NULL) + extsubset = 1; + } } if (elemDecl == NULL) { VERROR(ctxt->userData, "No declaration for element %s\n", @@ -4314,6 +4408,7 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, /* I don't think anything is required then */ break; case XML_ELEMENT_TYPE_MIXED: + /* simple case of declared as #PCDATA */ if ((elemDecl->content != NULL) && (elemDecl->content->type == XML_ELEMENT_CONTENT_PCDATA)) { @@ -4386,9 +4481,35 @@ child_ok: } break; case XML_ELEMENT_TYPE_ELEMENT: + if ((doc->standalone == 1) && (extsubset == 1)) { + /* + * VC: Standalone Document Declaration + * - element types with element content, if white space + * occurs directly within any instance of those types. + */ + child = elem->children; + while (child != NULL) { + if (child->type == XML_TEXT_NODE) { + const xmlChar *content = child->content; + + while (IS_BLANK(*content)) + content++; + if (*content == 0) { + VERROR(ctxt->userData, +"standalone: %s declared in the external subset contains white spaces nodes\n", + elem->name); + ret = 0; + break; + } + } + child =child->next; + } + } child = elem->children; cont = elemDecl->content; - ret = xmlValidateElementContent(ctxt, child, elemDecl, 1); + tmp = xmlValidateElementContent(ctxt, child, elemDecl, 1); + if (tmp <= 0) + ret = tmp; break; } |