aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog16
-rw-r--r--SAX.c58
-rw-r--r--include/libxml/parserInternals.h6
-rw-r--r--include/libxml/valid.h5
-rw-r--r--parser.c47
-rw-r--r--valid.c131
6 files changed, 222 insertions, 41 deletions
diff --git a/ChangeLog b/ChangeLog
index 6ccf4baa..b099f073 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+Tue Feb 19 22:01:35 CET 2002 Daniel Veillard <daniel@veillard.com>
+
+ * include/libxml/parserInternals.h parser.c: had to change
+ 2 internal parsing API when processing document content
+ to check the start and end of element content are defined
+ in the same entity
+ * valid.c include/libxml/valid.h: attribute normalization can
+ generate a validity error added xmlValidCtxtNormalizeAttributeValue()
+ with the context to report it.
+ * SAX.c: fixed the last known bugs, crazy validation constraints
+ when a document is standalone seems correctly handled. There
+ is a couple of open issues left which need consideration especially
+ PE93 on external unparsed entities and standalone status.
+ Ran 1819 tests: 1817 suceeded, 2 failed and 0 generated an error in 8.26 s.
+ The 2 tests left failing are actually in error. Cleanup done.
+
Tue Feb 19 15:17:02 CET 2002 Daniel Veillard <daniel@veillard.com>
* valid.c: implemented E59 spaces in CDATA does not match the
diff --git a/SAX.c b/SAX.c
index cbf21cf2..2665998c 100644
--- a/SAX.c
+++ b/SAX.c
@@ -830,8 +830,13 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
* Needed for HTML too:
* http://www.w3.org/TR/html4/types.html#h-6.2
*/
- nval = xmlValidNormalizeAttributeValue(ctxt->myDoc, ctxt->node,
+ ctxt->vctxt.valid = 1;
+ nval = xmlValidCtxtNormalizeAttributeValue(&ctxt->vctxt,
+ ctxt->myDoc, ctxt->node,
fullname, value);
+ if (ctxt->vctxt.valid != 1) {
+ ctxt->valid = 0;
+ }
if (nval != NULL)
value = nval;
@@ -985,16 +990,21 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
* Check defaulted attributes from the DTD
*/
static void
-xmlCheckDefaultedAttributesFromDtd(xmlParserCtxtPtr ctxt,
- xmlDtdPtr dtd, const xmlChar *name,
+xmlCheckDefaultedAttributes(xmlParserCtxtPtr ctxt, const xmlChar *name,
const xmlChar *prefix, const xmlChar **atts) {
xmlElementPtr elemDecl;
const xmlChar *att;
+ int internal = 1;
int i;
- if ((dtd == NULL) || (name == NULL))
- return;
- elemDecl = xmlGetDtdQElementDesc(dtd, name, prefix);
+ elemDecl = xmlGetDtdQElementDesc(ctxt->myDoc->intSubset, name, prefix);
+ if (elemDecl == NULL) {
+ elemDecl = xmlGetDtdQElementDesc(ctxt->myDoc->extSubset, name, prefix);
+ internal = 0;
+ }
+
+process_external_subset:
+
if (elemDecl != NULL) {
xmlAttributePtr attr = elemDecl->attributes;
/*
@@ -1008,7 +1018,10 @@ xmlCheckDefaultedAttributesFromDtd(xmlParserCtxtPtr ctxt,
if ((attr->defaultValue != NULL) &&
(xmlGetDtdQAttrDesc(ctxt->myDoc->extSubset,
attr->elem, attr->name,
- attr->prefix) == attr)) {
+ attr->prefix) == attr) &&
+ (xmlGetDtdQAttrDesc(ctxt->myDoc->intSubset,
+ attr->elem, attr->name,
+ attr->prefix) == NULL)) {
xmlChar *fulln;
if (attr->prefix != NULL) {
@@ -1039,9 +1052,7 @@ xmlCheckDefaultedAttributesFromDtd(xmlParserCtxtPtr ctxt,
ctxt->vctxt.error(ctxt->vctxt.userData,
"standalone: attribute %s on %s defaulted from external subset\n",
fulln, attr->elem);
- /* Waiting on the XML Core WG decision on this
ctxt->valid = 0;
- */
}
}
attr = attr->nexth;
@@ -1053,7 +1064,18 @@ xmlCheckDefaultedAttributesFromDtd(xmlParserCtxtPtr ctxt,
*/
attr = elemDecl->attributes;
while (attr != NULL) {
- if (attr->defaultValue != NULL) {
+ /*
+ * Make sure that attributes redefinition occuring in the
+ * internal subset are not overriden by definitions in the
+ * external subset.
+ */
+ if ((attr->defaultValue != NULL) &&
+ (xmlGetDtdQAttrDesc(ctxt->myDoc->extSubset,
+ attr->elem, attr->name,
+ attr->prefix) == attr) &&
+ (xmlGetDtdQAttrDesc(ctxt->myDoc->intSubset,
+ attr->elem, attr->name,
+ attr->prefix) == NULL)) {
/*
* the element should be instantiated in the tree if:
* - this is a namespace prefix
@@ -1090,13 +1112,20 @@ xmlCheckDefaultedAttributesFromDtd(xmlParserCtxtPtr ctxt,
att = atts[i];
}
}
- if (att == NULL)
+ if (att == NULL) {
attribute(ctxt, fulln, attr->defaultValue);
+ }
xmlFree(fulln);
}
}
attr = attr->nexth;
}
+ if (internal == 1) {
+ elemDecl = xmlGetDtdQElementDesc(ctxt->myDoc->extSubset,
+ name, prefix);
+ internal = 0;
+ goto process_external_subset;
+ }
}
}
@@ -1206,12 +1235,7 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
if ((!ctxt->html) &&
((ctxt->myDoc->intSubset != NULL) ||
(ctxt->myDoc->extSubset != NULL))) {
- if (ctxt->myDoc->intSubset != NULL)
- xmlCheckDefaultedAttributesFromDtd(ctxt, ctxt->myDoc->intSubset,
- name, prefix, atts);
- if (ctxt->myDoc->extSubset != NULL)
- xmlCheckDefaultedAttributesFromDtd(ctxt, ctxt->myDoc->extSubset,
- name, prefix, atts);
+ xmlCheckDefaultedAttributes(ctxt, name, prefix, atts);
}
/*
diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h
index 2b4c1a66..93e230e5 100644
--- a/include/libxml/parserInternals.h
+++ b/include/libxml/parserInternals.h
@@ -273,14 +273,16 @@ int xmlParseAttributeType (xmlParserCtxtPtr ctxt,
xmlEnumerationPtr *tree);
void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt);
xmlElementContentPtr xmlParseElementMixedContentDecl
- (xmlParserCtxtPtr ctxt);
+ (xmlParserCtxtPtr ctxt,
+ xmlParserInputPtr inputchk);
#ifdef VMS
xmlElementContentPtr xmlParseElementChildrenContentD
(xmlParserCtxtPtr ctxt);
#define xmlParseElementChildrenContentDecl xmlParseElementChildrenContentD
#else
xmlElementContentPtr xmlParseElementChildrenContentDecl
- (xmlParserCtxtPtr ctxt);
+ (xmlParserCtxtPtr ctxt,
+ xmlParserInputPtr inputchk);
#endif
int xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,
xmlChar *name,
diff --git a/include/libxml/valid.h b/include/libxml/valid.h
index d49a2647..db427ff3 100644
--- a/include/libxml/valid.h
+++ b/include/libxml/valid.h
@@ -223,6 +223,11 @@ xmlChar * xmlValidNormalizeAttributeValue(xmlDocPtr doc,
xmlNodePtr elem,
const xmlChar *name,
const xmlChar *value);
+xmlChar * xmlValidCtxtNormalizeAttributeValue(xmlValidCtxtPtr ctxt,
+ xmlDocPtr doc,
+ xmlNodePtr elem,
+ const xmlChar *name,
+ const xmlChar *value);
int xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt,
xmlDocPtr doc,
xmlAttributePtr attr);
diff --git a/parser.c b/parser.c
index 6d6df5b8..09833e0c 100644
--- a/parser.c
+++ b/parser.c
@@ -4132,7 +4132,7 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
* returns: the list of the xmlElementContentPtr describing the element choices
*/
xmlElementContentPtr
-xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
+xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
xmlElementContentPtr ret = NULL, cur = NULL, n;
xmlChar *elem = NULL;
@@ -4145,7 +4145,13 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
SKIP_BLANKS;
SHRINK;
if (RAW == ')') {
- ctxt->entity = ctxt->input;
+ if ((ctxt->validate) && (ctxt->input != inputchk)) {
+ ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
+ if (ctxt->vctxt.error != NULL)
+ ctxt->vctxt.error(ctxt->vctxt.userData,
+"Element content declaration doesn't start and stop in the same entity\n");
+ ctxt->valid = 0;
+ }
NEXT;
ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
if (RAW == '*') {
@@ -4203,7 +4209,13 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
xmlFree(elem);
}
ret->ocur = XML_ELEMENT_CONTENT_MULT;
- ctxt->entity = ctxt->input;
+ if ((ctxt->validate) && (ctxt->input != inputchk)) {
+ ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
+ if (ctxt->vctxt.error != NULL)
+ ctxt->vctxt.error(ctxt->vctxt.userData,
+"Element content declaration doesn't start and stop in the same entity\n");
+ ctxt->valid = 0;
+ }
SKIP(2);
} else {
if (elem != NULL) xmlFree(elem);
@@ -4273,7 +4285,7 @@ xmlParseElementChildrenContentD
#else
xmlParseElementChildrenContentDecl
#endif
-(xmlParserCtxtPtr ctxt) {
+(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
xmlChar *elem;
xmlChar type = 0;
@@ -4281,10 +4293,12 @@ xmlParseElementChildrenContentDecl
SKIP_BLANKS;
GROW;
if (RAW == '(') {
+ xmlParserInputPtr input = ctxt->input;
+
/* Recurse on first child */
NEXT;
SKIP_BLANKS;
- cur = ret = xmlParseElementChildrenContentDecl(ctxt);
+ cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
SKIP_BLANKS;
GROW;
} else {
@@ -4437,10 +4451,11 @@ xmlParseElementChildrenContentDecl
SKIP_BLANKS;
GROW;
if (RAW == '(') {
+ xmlParserInputPtr input = ctxt->input;
/* Recurse on second child */
NEXT;
SKIP_BLANKS;
- last = xmlParseElementChildrenContentDecl(ctxt);
+ last = xmlParseElementChildrenContentDecl(ctxt, input);
SKIP_BLANKS;
} else {
elem = xmlParseName(ctxt);
@@ -4483,7 +4498,13 @@ xmlParseElementChildrenContentDecl
if (last != NULL)
last->parent = cur;
}
- ctxt->entity = ctxt->input;
+ if ((ctxt->validate) && (ctxt->input != inputchk)) {
+ ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
+ if (ctxt->vctxt.error != NULL)
+ ctxt->vctxt.error(ctxt->vctxt.userData,
+"Element content declaration doesn't start and stop in the same entity\n");
+ ctxt->valid = 0;
+ }
NEXT;
if (RAW == '?') {
if (ret != NULL)
@@ -4583,20 +4604,12 @@ xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
(NXT(2) == 'C') && (NXT(3) == 'D') &&
(NXT(4) == 'A') && (NXT(5) == 'T') &&
(NXT(6) == 'A')) {
- tree = xmlParseElementMixedContentDecl(ctxt);
+ tree = xmlParseElementMixedContentDecl(ctxt, input);
res = XML_ELEMENT_TYPE_MIXED;
} else {
- tree = xmlParseElementChildrenContentDecl(ctxt);
+ tree = xmlParseElementChildrenContentDecl(ctxt, input);
res = XML_ELEMENT_TYPE_ELEMENT;
}
- if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
- ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
-"Element content declaration doesn't start and stop in the same entity\n");
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
- }
SKIP_BLANKS;
*result = tree;
return(res);
diff --git a/valid.c b/valid.c
index 79a35122..c6db9e48 100644
--- a/valid.c
+++ b/valid.c
@@ -2970,11 +2970,99 @@ xmlValidateAttributeValue2(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
}
/**
+ * xmlValidCtxtNormalizeAttributeValue:
+ * @ctxt: the validation context
+ * @doc: the document
+ * @elem: the parent
+ * @name: the attribute name
+ * @value: the attribute value
+ * @ctxt: the validation context or NULL
+ *
+ * Does the validation related extra step of the normalization of attribute
+ * values:
+ *
+ * If the declared value is not CDATA, then the XML processor must further
+ * process the normalized attribute value by discarding any leading and
+ * trailing space (#x20) characters, and by replacing sequences of space
+ * (#x20) characters by single space (#x20) character.
+ *
+ * Also check VC: Standalone Document Declaration in P32, and update
+ * ctxt->valid accordingly
+ *
+ * returns a new normalized string if normalization is needed, NULL otherwise
+ * the caller must free the returned value.
+ */
+
+xmlChar *
+xmlValidCtxtNormalizeAttributeValue(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
+ xmlNodePtr elem, const xmlChar *name, const xmlChar *value) {
+ xmlChar *ret, *dst;
+ const xmlChar *src;
+ xmlAttributePtr attrDecl = NULL;
+ int extsubset = 0;
+
+ if (doc == NULL) return(NULL);
+ if (elem == NULL) return(NULL);
+ if (name == NULL) return(NULL);
+ if (value == NULL) return(NULL);
+
+ if ((elem->ns != NULL) && (elem->ns->prefix != NULL)) {
+ xmlChar qname[500];
+ snprintf((char *) qname, sizeof(qname), "%s:%s",
+ elem->ns->prefix, elem->name);
+ qname[sizeof(qname) - 1] = 0;
+ attrDecl = xmlGetDtdAttrDesc(doc->intSubset, qname, name);
+ if ((attrDecl == NULL) && (doc->extSubset != NULL)) {
+ attrDecl = xmlGetDtdAttrDesc(doc->extSubset, qname, name);
+ if (attrDecl != NULL)
+ extsubset = 1;
+ }
+ }
+ if ((attrDecl == NULL) && (doc->intSubset != NULL))
+ attrDecl = xmlGetDtdAttrDesc(doc->intSubset, elem->name, name);
+ if ((attrDecl == NULL) && (doc->extSubset != NULL)) {
+ attrDecl = xmlGetDtdAttrDesc(doc->extSubset, elem->name, name);
+ if (attrDecl != NULL)
+ extsubset = 1;
+ }
+
+ if (attrDecl == NULL)
+ return(NULL);
+ if (attrDecl->atype == XML_ATTRIBUTE_CDATA)
+ return(NULL);
+
+ ret = xmlStrdup(value);
+ if (ret == NULL)
+ return(NULL);
+ src = value;
+ dst = ret;
+ while (*src == 0x20) src++;
+ while (*src != 0) {
+ if (*src == 0x20) {
+ while (*src == 0x20) src++;
+ if (*src != 0)
+ *dst++ = 0x20;
+ } else {
+ *dst++ = *src++;
+ }
+ }
+ *dst = 0;
+ if ((doc->standalone) && (extsubset == 1) && (!xmlStrEqual(value, ret))) {
+ VERROR(ctxt->userData,
+"standalone: %s on %s value had to be normalized based on external subset declaration\n",
+ name, elem->name);
+ ctxt->valid = 0;
+ }
+ return(ret);
+}
+
+/**
* xmlValidNormalizeAttributeValue:
* @doc: the document
* @elem: the parent
* @name: the attribute name
* @value: the attribute value
+ * @ctxt: the validation context or NULL
*
* Does the validation related extra step of the normalization of attribute
* values:
@@ -3234,7 +3322,6 @@ xmlValidateElementDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
elem->name);
ret = 0;
}
-
/* One ID per Element Type
* already done when registering the attribute
if (xmlScanIDAttributeDecl(ctxt, elem) > 1) {
@@ -4195,9 +4282,10 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
xmlElementContentPtr cont;
xmlAttributePtr attr;
xmlNodePtr child;
- int ret = 1;
+ int ret = 1, tmp;
const xmlChar *name;
const xmlChar *prefix = NULL;
+ int extsubset = 0;
CHECK_DTD;
@@ -4275,9 +4363,12 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
if (prefix != NULL) {
elemDecl = xmlGetDtdQElementDesc(doc->intSubset,
elem->name, prefix);
- if ((elemDecl == NULL) && (doc->extSubset != NULL))
+ if ((elemDecl == NULL) && (doc->extSubset != NULL)) {
elemDecl = xmlGetDtdQElementDesc(doc->extSubset,
elem->name, prefix);
+ if (elemDecl != NULL)
+ extsubset = 1;
+ }
}
/*
@@ -4287,8 +4378,11 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
*/
if (elemDecl == NULL) {
elemDecl = xmlGetDtdElementDesc(doc->intSubset, elem->name);
- if ((elemDecl == NULL) && (doc->extSubset != NULL))
+ if ((elemDecl == NULL) && (doc->extSubset != NULL)) {
elemDecl = xmlGetDtdElementDesc(doc->extSubset, elem->name);
+ if (elemDecl != NULL)
+ extsubset = 1;
+ }
}
if (elemDecl == NULL) {
VERROR(ctxt->userData, "No declaration for element %s\n",
@@ -4314,6 +4408,7 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
/* I don't think anything is required then */
break;
case XML_ELEMENT_TYPE_MIXED:
+
/* simple case of declared as #PCDATA */
if ((elemDecl->content != NULL) &&
(elemDecl->content->type == XML_ELEMENT_CONTENT_PCDATA)) {
@@ -4386,9 +4481,35 @@ child_ok:
}
break;
case XML_ELEMENT_TYPE_ELEMENT:
+ if ((doc->standalone == 1) && (extsubset == 1)) {
+ /*
+ * VC: Standalone Document Declaration
+ * - element types with element content, if white space
+ * occurs directly within any instance of those types.
+ */
+ child = elem->children;
+ while (child != NULL) {
+ if (child->type == XML_TEXT_NODE) {
+ const xmlChar *content = child->content;
+
+ while (IS_BLANK(*content))
+ content++;
+ if (*content == 0) {
+ VERROR(ctxt->userData,
+"standalone: %s declared in the external subset contains white spaces nodes\n",
+ elem->name);
+ ret = 0;
+ break;
+ }
+ }
+ child =child->next;
+ }
+ }
child = elem->children;
cont = elemDecl->content;
- ret = xmlValidateElementContent(ctxt, child, elemDecl, 1);
+ tmp = xmlValidateElementContent(ctxt, child, elemDecl, 1);
+ if (tmp <= 0)
+ ret = tmp;
break;
}