aboutsummaryrefslogtreecommitdiffstats
path: root/HTMLparser.c
diff options
context:
space:
mode:
authorDaniel Veillard <veillard@src.gnome.org>2003-08-09 11:44:50 +0000
committerDaniel Veillard <veillard@src.gnome.org>2003-08-09 11:44:50 +0000
commit14f752c2b725131a24c1296109a0406fc870279c (patch)
treed0f3dd09aa9a0455ebd67bf3174fb831b778731b /HTMLparser.c
parent6d13f33d8791897c57599570afceb4eba092a68e (diff)
downloadandroid_external_libxml2-14f752c2b725131a24c1296109a0406fc870279c.tar.gz
android_external_libxml2-14f752c2b725131a24c1296109a0406fc870279c.tar.bz2
android_external_libxml2-14f752c2b725131a24c1296109a0406fc870279c.zip
fixed a nasty bug #119387, bad heuristic from the progressive HTML parser
* HTMLparser.c: fixed a nasty bug #119387, bad heuristic from the progressive HTML parser front-end on large character data island leading to an erroneous end of data detection by the parser. Some cleanup too to get closer from the XML progressive parser. Daniel
Diffstat (limited to 'HTMLparser.c')
-rw-r--r--HTMLparser.c39
1 files changed, 25 insertions, 14 deletions
diff --git a/HTMLparser.c b/HTMLparser.c
index 0aa00411..2168bbdf 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -4950,19 +4950,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
/* TODO: check generation of subtrees if noent !!! */
htmlParseReference(ctxt);
} else {
- /* TODO Avoid the extra copy, handle directly !!!!!! */
- /*
- * Goal of the following test is:
- * - minimize calls to the SAX 'character' callback
- * when they are mergeable
+ /*
+ * check that the text sequence is complete
+ * before handing out the data to the parser
+ * to avoid problems with erroneous end of
+ * data detection.
*/
- if ((ctxt->inputNr == 1) &&
- (avail < HTML_PARSER_BIG_BUFFER_SIZE)) {
- if ((!terminate) &&
- (htmlParseLookupSequence(
- ctxt, '<', 0, 0, 0) < 0))
- goto done;
- }
+ if ((!terminate) &&
+ (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0))
+ goto done;
ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -5160,12 +5156,27 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
#endif
+#if 0
if ((terminate) || (ctxt->input->buf->buffer->use > 80))
htmlParseTryOrFinish(ctxt, terminate);
+#endif
} else if (ctxt->instate != XML_PARSER_EOF) {
- xmlParserInputBufferPush(ctxt->input->buf, 0, "");
- htmlParseTryOrFinish(ctxt, terminate);
+ if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
+ xmlParserInputBufferPtr in = ctxt->input->buf;
+ if ((in->encoder != NULL) && (in->buffer != NULL) &&
+ (in->raw != NULL)) {
+ int nbchars;
+
+ nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
+ if (nbchars < 0) {
+ xmlGenericError(xmlGenericErrorContext,
+ "htmlParseChunk: encoder error\n");
+ return(XML_ERR_INVALID_ENCODING);
+ }
+ }
+ }
}
+ htmlParseTryOrFinish(ctxt, terminate);
if (terminate) {
if ((ctxt->instate != XML_PARSER_EOF) &&
(ctxt->instate != XML_PARSER_EPILOG) &&