diff options
| author | Daniel Veillard <veillard@src.gnome.org> | 2003-08-09 11:44:50 +0000 |
|---|---|---|
| committer | Daniel Veillard <veillard@src.gnome.org> | 2003-08-09 11:44:50 +0000 |
| commit | 14f752c2b725131a24c1296109a0406fc870279c (patch) | |
| tree | d0f3dd09aa9a0455ebd67bf3174fb831b778731b /HTMLparser.c | |
| parent | 6d13f33d8791897c57599570afceb4eba092a68e (diff) | |
| download | android_external_libxml2-14f752c2b725131a24c1296109a0406fc870279c.tar.gz android_external_libxml2-14f752c2b725131a24c1296109a0406fc870279c.tar.bz2 android_external_libxml2-14f752c2b725131a24c1296109a0406fc870279c.zip | |
fixed a nasty bug #119387, bad heuristic from the progressive HTML parser
* HTMLparser.c: fixed a nasty bug #119387, bad heuristic from
the progressive HTML parser front-end on large character data
island leading to an erroneous end of data detection by the
parser. Some cleanup too to get closer from the XML progressive
parser.
Daniel
Diffstat (limited to 'HTMLparser.c')
| -rw-r--r-- | HTMLparser.c | 39 |
1 files changed, 25 insertions, 14 deletions
diff --git a/HTMLparser.c b/HTMLparser.c index 0aa00411..2168bbdf 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -4950,19 +4950,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { /* TODO: check generation of subtrees if noent !!! */ htmlParseReference(ctxt); } else { - /* TODO Avoid the extra copy, handle directly !!!!!! */ - /* - * Goal of the following test is: - * - minimize calls to the SAX 'character' callback - * when they are mergeable + /* + * check that the text sequence is complete + * before handing out the data to the parser + * to avoid problems with erroneous end of + * data detection. */ - if ((ctxt->inputNr == 1) && - (avail < HTML_PARSER_BIG_BUFFER_SIZE)) { - if ((!terminate) && - (htmlParseLookupSequence( - ctxt, '<', 0, 0, 0) < 0)) - goto done; - } + if ((!terminate) && + (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0)) + goto done; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -5160,12 +5156,27 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); #endif +#if 0 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) htmlParseTryOrFinish(ctxt, terminate); +#endif } else if (ctxt->instate != XML_PARSER_EOF) { - xmlParserInputBufferPush(ctxt->input->buf, 0, ""); - htmlParseTryOrFinish(ctxt, terminate); + if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { + xmlParserInputBufferPtr in = ctxt->input->buf; + if ((in->encoder != NULL) && (in->buffer != NULL) && + (in->raw != NULL)) { + int nbchars; + + nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); + if (nbchars < 0) { + xmlGenericError(xmlGenericErrorContext, + "htmlParseChunk: encoder error\n"); + return(XML_ERR_INVALID_ENCODING); + } + } + } } + htmlParseTryOrFinish(ctxt, terminate); if (terminate) { if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->instate != XML_PARSER_EPILOG) && |
