aboutsummaryrefslogtreecommitdiffstats
path: root/HTMLparser.c
diff options
context:
space:
mode:
authorDaniel Veillard <veillard@src.gnome.org>2001-04-24 12:51:09 +0000
committerDaniel Veillard <veillard@src.gnome.org>2001-04-24 12:51:09 +0000
commit56098d4f350981f4249fad3e19b900b3d214cd3c (patch)
treeb99c2f17eca9f9f0061f4b1de5a214f3bdc1488b /HTMLparser.c
parent122376b8efbff2cd6b62af2dedcdea05e07f5766 (diff)
downloadandroid_external_libxml2-56098d4f350981f4249fad3e19b900b3d214cd3c.tar.gz
android_external_libxml2-56098d4f350981f4249fad3e19b900b3d214cd3c.tar.bz2
android_external_libxml2-56098d4f350981f4249fad3e19b900b3d214cd3c.zip
- HTMLparser.c : HTML parsing still sucks ... trying to deal
with madness - result/HTML/ : this modified the result of the regression tests a lot. Daniel
Diffstat (limited to 'HTMLparser.c')
-rw-r--r--HTMLparser.c40
1 files changed, 25 insertions, 15 deletions
diff --git a/HTMLparser.c b/HTMLparser.c
index 1e147789..ace49d9f 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -346,6 +346,7 @@ htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
* Start Tag: 1 means the start tag can be ommited
* End Tag: 1 means the end tag can be ommited
* 2 means it's forbidden (empty elements)
+ * 3 means the tag is stylistic and should be closed easilly
* Depr: this element is deprecated
* DTD: 1 means that this element is valid only in the Loose DTD
* 2 means that this element is valid only in the Frameset DTD
@@ -359,17 +360,17 @@ htmlElemDesc html40ElementTable[] = {
{ "address", 0, 0, 0, 0, 0, 0, "information on author " },
{ "applet", 0, 0, 0, 0, 1, 1, "java applet " },
{ "area", 0, 2, 2, 1, 0, 0, "client-side image map area " },
-{ "b", 0, 0, 0, 0, 0, 0, "bold text style" },
+{ "b", 0, 3, 0, 0, 0, 0, "bold text style" },
{ "base", 0, 2, 2, 1, 0, 0, "document base uri " },
{ "basefont", 0, 2, 2, 1, 1, 1, "base font size " },
{ "bdo", 0, 0, 0, 0, 0, 0, "i18n bidi over-ride " },
-{ "big", 0, 0, 0, 0, 0, 0, "large text style" },
+{ "big", 0, 3, 0, 0, 0, 0, "large text style" },
{ "blockquote", 0, 0, 0, 0, 0, 0, "long quotation " },
{ "body", 1, 1, 0, 0, 0, 0, "document body " },
{ "br", 0, 2, 2, 1, 0, 0, "forced line break " },
{ "button", 0, 0, 0, 0, 0, 0, "push button " },
{ "caption", 0, 0, 0, 0, 0, 0, "table caption " },
-{ "center", 0, 0, 0, 0, 1, 1, "shorthand for div align=center " },
+{ "center", 0, 3, 0, 0, 1, 1, "shorthand for div align=center " },
{ "cite", 0, 0, 0, 0, 0, 0, "citation" },
{ "code", 0, 0, 0, 0, 0, 0, "computer code fragment" },
{ "col", 0, 2, 2, 1, 0, 0, "table column " },
@@ -381,9 +382,9 @@ htmlElemDesc html40ElementTable[] = {
{ "div", 0, 0, 0, 0, 0, 0, "generic language/style container"},
{ "dl", 0, 0, 0, 0, 0, 0, "definition list " },
{ "dt", 0, 1, 0, 0, 0, 0, "definition term " },
-{ "em", 0, 0, 0, 0, 0, 0, "emphasis" },
+{ "em", 0, 3, 0, 0, 0, 0, "emphasis" },
{ "fieldset", 0, 0, 0, 0, 0, 0, "form control group " },
-{ "font", 0, 0, 0, 0, 1, 1, "local change to font " },
+{ "font", 0, 3, 0, 0, 1, 1, "local change to font " },
{ "form", 0, 0, 0, 0, 0, 0, "interactive form " },
{ "frame", 0, 2, 2, 1, 0, 2, "subwindow " },
{ "frameset", 0, 0, 0, 0, 0, 2, "window subdivision" },
@@ -396,7 +397,7 @@ htmlElemDesc html40ElementTable[] = {
{ "head", 1, 1, 0, 0, 0, 0, "document head " },
{ "hr", 0, 2, 2, 1, 0, 0, "horizontal rule " },
{ "html", 1, 1, 0, 0, 0, 0, "document root element " },
-{ "i", 0, 0, 0, 0, 0, 0, "italic text style" },
+{ "i", 0, 3, 0, 0, 0, 0, "italic text style" },
{ "iframe", 0, 0, 0, 0, 0, 1, "inline subwindow " },
{ "img", 0, 2, 2, 1, 0, 0, "embedded image " },
{ "input", 0, 2, 2, 1, 0, 0, "form control " },
@@ -420,17 +421,17 @@ htmlElemDesc html40ElementTable[] = {
{ "param", 0, 2, 2, 1, 0, 0, "named property value " },
{ "pre", 0, 0, 0, 0, 0, 0, "preformatted text " },
{ "q", 0, 0, 0, 0, 0, 0, "short inline quotation " },
-{ "s", 0, 0, 0, 0, 1, 1, "strike-through text style" },
+{ "s", 0, 3, 0, 0, 1, 1, "strike-through text style" },
{ "samp", 0, 0, 0, 0, 0, 0, "sample program output, scripts, etc." },
{ "script", 0, 0, 0, 0, 0, 0, "script statements " },
{ "select", 0, 0, 0, 0, 0, 0, "option selector " },
-{ "small", 0, 0, 0, 0, 0, 0, "small text style" },
+{ "small", 0, 3, 0, 0, 0, 0, "small text style" },
{ "span", 0, 0, 0, 0, 0, 0, "generic language/style container " },
-{ "strike", 0, 0, 0, 0, 1, 1, "strike-through text" },
-{ "strong", 0, 0, 0, 0, 0, 0, "strong emphasis" },
+{ "strike", 0, 3, 0, 0, 1, 1, "strike-through text" },
+{ "strong", 0, 3, 0, 0, 0, 0, "strong emphasis" },
{ "style", 0, 0, 0, 0, 0, 0, "style info " },
-{ "sub", 0, 0, 0, 0, 0, 0, "subscript" },
-{ "sup", 0, 0, 0, 0, 0, 0, "superscript " },
+{ "sub", 0, 3, 0, 0, 0, 0, "subscript" },
+{ "sup", 0, 3, 0, 0, 0, 0, "superscript " },
{ "table", 0, 0, 0, 0, 0, 0, "&#160;" },
{ "tbody", 1, 0, 0, 0, 0, 0, "table body " },
{ "td", 0, 0, 0, 0, 0, 0, "table data cell" },
@@ -440,8 +441,8 @@ htmlElemDesc html40ElementTable[] = {
{ "thead", 0, 1, 0, 0, 0, 0, "table header " },
{ "title", 0, 0, 0, 0, 0, 0, "document title " },
{ "tr", 0, 0, 0, 0, 0, 0, "table row " },
-{ "tt", 0, 0, 0, 0, 0, 0, "teletype or monospaced text style" },
-{ "u", 0, 0, 0, 0, 1, 1, "underlined text style" },
+{ "tt", 0, 3, 0, 0, 0, 0, "teletype or monospaced text style" },
+{ "u", 0, 3, 0, 0, 1, 1, "underlined text style" },
{ "ul", 0, 0, 0, 0, 0, 0, "unordered list " },
{ "var", 0, 0, 0, 0, 0, 0, "instance of a variable or program argument" },
};
@@ -682,7 +683,16 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
#ifdef DEBUG
xmlGenericError(xmlGenericErrorContext,"htmlAutoCloseOnClose: %s closes %s\n", newtag, ctxt->name);
#endif
- } else {
+ } else if (info->endTag == 3) {
+#ifdef DEBUG
+ xmlGenericError(xmlGenericErrorContext,"End of tag %s: expecting %s\n", name, ctxt->name);
+#endif
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "Opening and ending tag mismatch: %s and %s\n",
+ newtag, ctxt->name);
+ ctxt->wellFormed = 0;
+ } else {
return;
}
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))