aboutsummaryrefslogtreecommitdiffstats
path: root/HTMLparser.c
diff options
context:
space:
mode:
Diffstat (limited to 'HTMLparser.c')
-rw-r--r--HTMLparser.c682
1 files changed, 320 insertions, 362 deletions
diff --git a/HTMLparser.c b/HTMLparser.c
index 9e60e27e..b9812985 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -69,7 +69,7 @@ static void htmlParseComment(htmlParserCtxtPtr ctxt);
/**
* htmlErrMemory:
* @ctxt: an HTML parser context
- * @extra: extra informations
+ * @extra: extra information
*
* Handle a redefinition of attribute error
*/
@@ -317,7 +317,7 @@ htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
#define SKIP_BLANKS htmlSkipBlankChars(ctxt)
-/* Inported from XML */
+/* Imported from XML */
/* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
#define CUR ((int) (*ctxt->input->cur))
@@ -414,6 +414,10 @@ htmlFindEncoding(xmlParserCtxtPtr ctxt) {
static int
htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
+ const unsigned char *cur;
+ unsigned char c;
+ unsigned int val;
+
if (ctxt->instate == XML_PARSER_EOF)
return(0);
@@ -421,99 +425,29 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
*len = 0;
return(ctxt->token);
}
- if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
- /*
- * We are supposed to handle UTF8, check it's valid
- * From rfc2044: encoding of the Unicode values on UTF-8:
- *
- * UCS-4 range (hex.) UTF-8 octet sequence (binary)
- * 0000 0000-0000 007F 0xxxxxxx
- * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
- *
- * Check for the 0x110000 limit too
- */
- const unsigned char *cur = ctxt->input->cur;
- unsigned char c;
- unsigned int val;
-
- c = *cur;
- if (c & 0x80) {
- if (cur[1] == 0) {
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
- cur = ctxt->input->cur;
- }
- if ((cur[1] & 0xc0) != 0x80)
- goto encoding_error;
- if ((c & 0xe0) == 0xe0) {
+ if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
+ xmlChar * guess;
+ xmlCharEncodingHandlerPtr handler;
- if (cur[2] == 0) {
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
- cur = ctxt->input->cur;
- }
- if ((cur[2] & 0xc0) != 0x80)
- goto encoding_error;
- if ((c & 0xf0) == 0xf0) {
- if (cur[3] == 0) {
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
- cur = ctxt->input->cur;
- }
- if (((c & 0xf8) != 0xf0) ||
- ((cur[3] & 0xc0) != 0x80))
- goto encoding_error;
- /* 4-byte code */
- *len = 4;
- val = (cur[0] & 0x7) << 18;
- val |= (cur[1] & 0x3f) << 12;
- val |= (cur[2] & 0x3f) << 6;
- val |= cur[3] & 0x3f;
- } else {
- /* 3-byte code */
- *len = 3;
- val = (cur[0] & 0xf) << 12;
- val |= (cur[1] & 0x3f) << 6;
- val |= cur[2] & 0x3f;
- }
- } else {
- /* 2-byte code */
- *len = 2;
- val = (cur[0] & 0x1f) << 6;
- val |= cur[1] & 0x3f;
- }
- if (!IS_CHAR(val)) {
- htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
- "Char 0x%X out of allowed range\n", val);
- }
- return(val);
- } else {
+ /*
+ * Assume it's a fixed length encoding (1) with
+ * a compatible encoding for the ASCII set, since
+ * HTML constructs only use < 128 chars
+ */
+ if ((int) *ctxt->input->cur < 0x80) {
+ *len = 1;
if ((*ctxt->input->cur == 0) &&
(ctxt->input->cur < ctxt->input->end)) {
- htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
- "Char 0x%X out of allowed range\n", 0);
- *len = 1;
+ htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
+ "Char 0x%X out of allowed range\n", 0);
return(' ');
}
- /* 1-byte code */
- *len = 1;
- return((int) *ctxt->input->cur);
- }
- }
- /*
- * Assume it's a fixed length encoding (1) with
- * a compatible encoding for the ASCII set, since
- * XML constructs only use < 128 chars
- */
- *len = 1;
- if ((int) *ctxt->input->cur < 0x80)
- return((int) *ctxt->input->cur);
-
- /*
- * Humm this is bad, do an automatic flow conversion
- */
- {
- xmlChar * guess;
- xmlCharEncodingHandlerPtr handler;
+ return((int) *ctxt->input->cur);
+ }
+ /*
+ * Humm this is bad, do an automatic flow conversion
+ */
guess = htmlFindEncoding(ctxt);
if (guess == NULL) {
xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
@@ -532,12 +466,91 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
ctxt->charset = XML_CHAR_ENCODING_UTF8;
}
- return(xmlCurrentChar(ctxt, len));
+ /*
+ * We are supposed to handle UTF8, check it's valid
+ * From rfc2044: encoding of the Unicode values on UTF-8:
+ *
+ * UCS-4 range (hex.) UTF-8 octet sequence (binary)
+ * 0000 0000-0000 007F 0xxxxxxx
+ * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ *
+ * Check for the 0x110000 limit too
+ */
+ cur = ctxt->input->cur;
+ c = *cur;
+ if (c & 0x80) {
+ if ((c & 0x40) == 0)
+ goto encoding_error;
+ if (cur[1] == 0) {
+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
+ if ((cur[1] & 0xc0) != 0x80)
+ goto encoding_error;
+ if ((c & 0xe0) == 0xe0) {
+
+ if (cur[2] == 0) {
+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
+ if ((cur[2] & 0xc0) != 0x80)
+ goto encoding_error;
+ if ((c & 0xf0) == 0xf0) {
+ if (cur[3] == 0) {
+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ cur = ctxt->input->cur;
+ }
+ if (((c & 0xf8) != 0xf0) ||
+ ((cur[3] & 0xc0) != 0x80))
+ goto encoding_error;
+ /* 4-byte code */
+ *len = 4;
+ val = (cur[0] & 0x7) << 18;
+ val |= (cur[1] & 0x3f) << 12;
+ val |= (cur[2] & 0x3f) << 6;
+ val |= cur[3] & 0x3f;
+ if (val < 0x10000)
+ goto encoding_error;
+ } else {
+ /* 3-byte code */
+ *len = 3;
+ val = (cur[0] & 0xf) << 12;
+ val |= (cur[1] & 0x3f) << 6;
+ val |= cur[2] & 0x3f;
+ if (val < 0x800)
+ goto encoding_error;
+ }
+ } else {
+ /* 2-byte code */
+ *len = 2;
+ val = (cur[0] & 0x1f) << 6;
+ val |= cur[1] & 0x3f;
+ if (val < 0x80)
+ goto encoding_error;
+ }
+ if (!IS_CHAR(val)) {
+ htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
+ "Char 0x%X out of allowed range\n", val);
+ }
+ return(val);
+ } else {
+ if ((*ctxt->input->cur == 0) &&
+ (ctxt->input->cur < ctxt->input->end)) {
+ htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
+ "Char 0x%X out of allowed range\n", 0);
+ *len = 1;
+ return(' ');
+ }
+ /* 1-byte code */
+ *len = 1;
+ return((int) *ctxt->input->cur);
+ }
encoding_error:
/*
* If we detect an UTF8 error that probably mean that the
- * input encoding didn't get properly advertized in the
+ * input encoding didn't get properly advertised in the
* declaration header. Report the error and switch the encoding
* to ISO-Latin-1 (if you don't like this policy, just declare the
* encoding !)
@@ -602,8 +615,8 @@ htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
************************************************************************/
/*
- * Start Tag: 1 means the start tag can be ommited
- * End Tag: 1 means the end tag can be ommited
+ * Start Tag: 1 means the start tag can be omitted
+ * End Tag: 1 means the end tag can be omitted
* 2 means it's forbidden (empty elements)
* 3 means the tag is stylistic and should be closed easily
* Depr: this element is deprecated
@@ -1342,7 +1355,7 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
if (xmlStrEqual(newtag, ctxt->nameTab[i]))
break;
/*
- * A missplaced endtag can only close elements with lower
+ * A misplaced endtag can only close elements with lower
* or equal priority, so if we find an element with higher
* priority before we find an element with
* matching name, we just ignore this endtag
@@ -2176,6 +2189,7 @@ htmlEncodeEntities(unsigned char* out, int *outlen,
* *
************************************************************************/
+#ifdef LIBXML_PUSH_ENABLED
/**
* htmlNewInputStream:
* @ctxt: an HTML parser context
@@ -2207,6 +2221,7 @@ htmlNewInputStream(htmlParserCtxtPtr ctxt) {
input->length = 0;
return(input);
}
+#endif
/************************************************************************
@@ -2216,9 +2231,9 @@ htmlNewInputStream(htmlParserCtxtPtr ctxt) {
************************************************************************/
/*
* all tags allowing pc data from the html 4.01 loose dtd
- * NOTE: it might be more apropriate to integrate this information
+ * NOTE: it might be more appropriate to integrate this information
* into the html40ElementTable array but I don't want to risk any
- * binary incomptibility
+ * binary incompatibility
*/
static const char *allowPCData[] = {
"a", "abbr", "acronym", "address", "applet", "b", "bdo", "big",
@@ -2787,47 +2802,39 @@ htmlParseAttValue(htmlParserCtxtPtr ctxt) {
static xmlChar *
htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
size_t len = 0, startPosition = 0;
+ int err = 0;
+ int quote;
xmlChar *ret = NULL;
- if (CUR == '"') {
- NEXT;
+ if ((CUR != '"') && (CUR != '\'')) {
+ htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
+ "SystemLiteral \" or ' expected\n", NULL, NULL);
+ return(NULL);
+ }
+ quote = CUR;
+ NEXT;
- if (CUR_PTR < BASE_PTR)
- return(ret);
- startPosition = CUR_PTR - BASE_PTR;
+ if (CUR_PTR < BASE_PTR)
+ return(ret);
+ startPosition = CUR_PTR - BASE_PTR;
- while ((IS_CHAR_CH(CUR)) && (CUR != '"')) {
- NEXT;
- len++;
- }
- if (!IS_CHAR_CH(CUR)) {
- htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
- "Unfinished SystemLiteral\n", NULL, NULL);
- } else {
- ret = xmlStrndup((BASE_PTR+startPosition), len);
- NEXT;
+ while ((CUR != 0) && (CUR != quote)) {
+ /* TODO: Handle UTF-8 */
+ if (!IS_CHAR_CH(CUR)) {
+ htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
+ "Invalid char in SystemLiteral 0x%X\n", CUR);
+ err = 1;
}
- } else if (CUR == '\'') {
NEXT;
-
- if (CUR_PTR < BASE_PTR)
- return(ret);
- startPosition = CUR_PTR - BASE_PTR;
-
- while ((IS_CHAR_CH(CUR)) && (CUR != '\'')) {
- NEXT;
- len++;
- }
- if (!IS_CHAR_CH(CUR)) {
- htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
- "Unfinished SystemLiteral\n", NULL, NULL);
- } else {
- ret = xmlStrndup((BASE_PTR+startPosition), len);
- NEXT;
- }
+ len++;
+ }
+ if (CUR != quote) {
+ htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
+ "Unfinished SystemLiteral\n", NULL, NULL);
} else {
- htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
- " or ' expected\n", NULL, NULL);
+ NEXT;
+ if (err == 0)
+ ret = xmlStrndup((BASE_PTR+startPosition), len);
}
return(ret);
@@ -2847,51 +2854,42 @@ htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
static xmlChar *
htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
size_t len = 0, startPosition = 0;
+ int err = 0;
+ int quote;
xmlChar *ret = NULL;
+
+ if ((CUR != '"') && (CUR != '\'')) {
+ htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
+ "PubidLiteral \" or ' expected\n", NULL, NULL);
+ return(NULL);
+ }
+ quote = CUR;
+ NEXT;
+
/*
* Name ::= (Letter | '_') (NameChar)*
*/
- if (CUR == '"') {
- NEXT;
-
- if (CUR_PTR < BASE_PTR)
- return(ret);
- startPosition = CUR_PTR - BASE_PTR;
-
- while (IS_PUBIDCHAR_CH(CUR)) {
- len++;
- NEXT;
+ if (CUR_PTR < BASE_PTR)
+ return(ret);
+ startPosition = CUR_PTR - BASE_PTR;
+
+ while ((CUR != 0) && (CUR != quote)) {
+ if (!IS_PUBIDCHAR_CH(CUR)) {
+ htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
+ "Invalid char in PubidLiteral 0x%X\n", CUR);
+ err = 1;
}
-
- if (CUR != '"') {
- htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
- "Unfinished PubidLiteral\n", NULL, NULL);
- } else {
- ret = xmlStrndup((BASE_PTR + startPosition), len);
- NEXT;
- }
- } else if (CUR == '\'') {
+ len++;
NEXT;
+ }
- if (CUR_PTR < BASE_PTR)
- return(ret);
- startPosition = CUR_PTR - BASE_PTR;
-
- while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\'')){
- len++;
- NEXT;
- }
-
- if (CUR != '\'') {
- htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
- "Unfinished PubidLiteral\n", NULL, NULL);
- } else {
- ret = xmlStrndup((BASE_PTR + startPosition), len);
- NEXT;
- }
+ if (CUR != '"') {
+ htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
+ "Unfinished PubidLiteral\n", NULL, NULL);
} else {
- htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
- "PubidLiteral \" or ' expected\n", NULL, NULL);
+ NEXT;
+ if (err == 0)
+ ret = xmlStrndup((BASE_PTR + startPosition), len);
}
return(ret);
@@ -2926,7 +2924,7 @@ htmlParseScript(htmlParserCtxtPtr ctxt) {
SHRINK;
cur = CUR_CHAR(l);
- while (IS_CHAR_CH(cur)) {
+ while (cur != 0) {
if ((cur == '<') && (NXT(1) == '/')) {
/*
* One should break here, the specification is clear:
@@ -2957,8 +2955,14 @@ htmlParseScript(htmlParserCtxtPtr ctxt) {
}
}
}
- COPY_BUF(l,buf,nbchar,cur);
+ if (IS_CHAR(cur)) {
+ COPY_BUF(l,buf,nbchar,cur);
+ } else {
+ htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
+ "Invalid char in CDATA 0x%X\n", cur);
+ }
if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
+ buf[nbchar] = 0;
if (ctxt->sax->cdataBlock!= NULL) {
/*
* Insert as CDATA, which is the same as HTML_PRESERVE_NODE
@@ -2974,15 +2978,8 @@ htmlParseScript(htmlParserCtxtPtr ctxt) {
cur = CUR_CHAR(l);
}
- if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) {
- htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
- "Invalid char in CDATA 0x%X\n", cur);
- if (ctxt->input->cur < ctxt->input->end) {
- NEXT;
- }
- }
-
if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
+ buf[nbchar] = 0;
if (ctxt->sax->cdataBlock!= NULL) {
/*
* Insert as CDATA, which is the same as HTML_PRESERVE_NODE
@@ -3028,6 +3025,8 @@ htmlParseCharDataInternal(htmlParserCtxtPtr ctxt, int readahead) {
COPY_BUF(l,buf,nbchar,cur);
}
if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
+ buf[nbchar] = 0;
+
/*
* Ok the segment is to be consumed as chars.
*/
@@ -3226,7 +3225,7 @@ htmlParsePI(htmlParserCtxtPtr ctxt) {
}
SKIP_BLANKS;
cur = CUR_CHAR(l);
- while (IS_CHAR(cur) && (cur != '>')) {
+ while ((cur != 0) && (cur != '>')) {
if (len + 5 >= size) {
xmlChar *tmp;
@@ -3245,7 +3244,13 @@ htmlParsePI(htmlParserCtxtPtr ctxt) {
GROW;
count = 0;
}
- COPY_BUF(l,buf,len,cur);
+ if (IS_CHAR(cur)) {
+ COPY_BUF(l,buf,len,cur);
+ } else {
+ htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
+ "Invalid char in processing instruction "
+ "0x%X\n", cur);
+ }
NEXTL(l);
cur = CUR_CHAR(l);
if (cur == 0) {
@@ -3315,15 +3320,15 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
len = 0;
buf[len] = 0;
q = CUR_CHAR(ql);
- if (!IS_CHAR(q))
+ if (q == 0)
goto unfinished;
NEXTL(ql);
r = CUR_CHAR(rl);
- if (!IS_CHAR(r))
+ if (r == 0)
goto unfinished;
NEXTL(rl);
cur = CUR_CHAR(l);
- while (IS_CHAR(cur) &&
+ while ((cur != 0) &&
((cur != '>') ||
(r != '-') || (q != '-'))) {
if (len + 5 >= size) {
@@ -3339,7 +3344,12 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
}
buf = tmp;
}
- COPY_BUF(ql,buf,len,q);
+ if (IS_CHAR(q)) {
+ COPY_BUF(ql,buf,len,q);
+ } else {
+ htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
+ "Invalid char in comment 0x%X\n", q);
+ }
q = r;
ql = rl;
r = cur;
@@ -3353,7 +3363,7 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
}
}
buf[len] = 0;
- if (IS_CHAR(cur)) {
+ if (cur == '>') {
NEXT;
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
(!ctxt->disableSAX))
@@ -3394,13 +3404,16 @@ htmlParseCharRef(htmlParserCtxtPtr ctxt) {
((NXT(2) == 'x') || NXT(2) == 'X')) {
SKIP(3);
while (CUR != ';') {
- if ((CUR >= '0') && (CUR <= '9'))
- val = val * 16 + (CUR - '0');
- else if ((CUR >= 'a') && (CUR <= 'f'))
- val = val * 16 + (CUR - 'a') + 10;
- else if ((CUR >= 'A') && (CUR <= 'F'))
- val = val * 16 + (CUR - 'A') + 10;
- else {
+ if ((CUR >= '0') && (CUR <= '9')) {
+ if (val < 0x110000)
+ val = val * 16 + (CUR - '0');
+ } else if ((CUR >= 'a') && (CUR <= 'f')) {
+ if (val < 0x110000)
+ val = val * 16 + (CUR - 'a') + 10;
+ } else if ((CUR >= 'A') && (CUR <= 'F')) {
+ if (val < 0x110000)
+ val = val * 16 + (CUR - 'A') + 10;
+ } else {
htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,
"htmlParseCharRef: missing semicolon\n",
NULL, NULL);
@@ -3413,9 +3426,10 @@ htmlParseCharRef(htmlParserCtxtPtr ctxt) {
} else if ((CUR == '&') && (NXT(1) == '#')) {
SKIP(2);
while (CUR != ';') {
- if ((CUR >= '0') && (CUR <= '9'))
- val = val * 10 + (CUR - '0');
- else {
+ if ((CUR >= '0') && (CUR <= '9')) {
+ if (val < 0x110000)
+ val = val * 10 + (CUR - '0');
+ } else {
htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,
"htmlParseCharRef: missing semicolon\n",
NULL, NULL);
@@ -3434,6 +3448,9 @@ htmlParseCharRef(htmlParserCtxtPtr ctxt) {
*/
if (IS_CHAR(val)) {
return(val);
+ } else if (val >= 0x110000) {
+ htmlParseErr(ctxt, XML_ERR_INVALID_CHAR,
+ "htmlParseCharRef: value too large\n", NULL, NULL);
} else {
htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
"htmlParseCharRef: invalid xmlChar value %d\n",
@@ -3493,9 +3510,12 @@ htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
if (CUR != '>') {
htmlParseErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED,
"DOCTYPE improperly terminated\n", NULL, NULL);
- /* We shouldn't try to resynchronize ... */
+ /* Ignore bogus content */
+ while ((CUR != 0) && (CUR != '>'))
+ NEXT;
}
- NEXT;
+ if (CUR == '>')
+ NEXT;
/*
* Create or update the document accordingly to the DOCTYPE
@@ -3773,7 +3793,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
/* Dump the bogus tag like browsers do */
- while ((IS_CHAR_CH(CUR)) && (CUR != '>') &&
+ while ((CUR != 0) && (CUR != '>') &&
(ctxt->instate != XML_PARSER_EOF))
NEXT;
return -1;
@@ -3829,7 +3849,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
* (S Attribute)* S?
*/
SKIP_BLANKS;
- while ((IS_CHAR_CH(CUR)) &&
+ while ((CUR != 0) &&
(CUR != '>') &&
((CUR != '/') || (NXT(1) != '>'))) {
long cons = ctxt->nbChars;
@@ -3892,7 +3912,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
xmlFree(attvalue);
/* Dump the bogus attribute string up to the next blank or
* the end of the tag. */
- while ((IS_CHAR_CH(CUR)) &&
+ while ((CUR != 0) &&
!(IS_BLANK_CH(CUR)) && (CUR != '>') &&
((CUR != '/') || (NXT(1) != '>')))
NEXT;
@@ -3973,19 +3993,14 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt)
* We should definitely be at the ending "S? '>'" part
*/
SKIP_BLANKS;
- if ((!IS_CHAR_CH(CUR)) || (CUR != '>')) {
+ if (CUR != '>') {
htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
"End tag : expected '>'\n", NULL, NULL);
- if (ctxt->recovery) {
- /*
- * We're not at the ending > !!
- * Error, unless in recover mode where we search forwards
- * until we find a >
- */
- while (CUR != '\0' && CUR != '>') NEXT;
- NEXT;
- }
- } else
+ /* Skip to next '>' */
+ while ((CUR != 0) && (CUR != '>'))
+ NEXT;
+ }
+ if (CUR == '>')
NEXT;
/*
@@ -4175,7 +4190,7 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
"htmlParseStartTag: invalid element name\n",
NULL, NULL);
/* Dump the bogus tag like browsers do */
- while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
+ while ((CUR != 0) && (CUR != '>'))
NEXT;
if (currentNode != NULL)
@@ -4390,7 +4405,7 @@ htmlParseElement(htmlParserCtxtPtr ctxt) {
*/
currentNode = xmlStrdup(ctxt->name);
depth = ctxt->nameNr;
- while (IS_CHAR_CH(CUR)) {
+ while (CUR != 0) {
oldptr = ctxt->input->cur;
htmlParseContent(ctxt);
if (oldptr==ctxt->input->cur) break;
@@ -4407,7 +4422,7 @@ htmlParseElement(htmlParserCtxtPtr ctxt) {
node_info.node = ctxt->node;
xmlParserAddNodeInfo(ctxt, &node_info);
}
- if (!IS_CHAR_CH(CUR)) {
+ if (CUR == 0) {
htmlAutoCloseOnEnd(ctxt);
}
@@ -4428,7 +4443,7 @@ htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) {
xmlParserAddNodeInfo(ctxt, ctxt->nodeInfo);
htmlNodeInfoPop(ctxt);
}
- if (!IS_CHAR_CH(CUR)) {
+ if (CUR == 0) {
htmlAutoCloseOnEnd(ctxt);
}
}
@@ -4577,7 +4592,7 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
"htmlParseStartTag: invalid element name\n",
NULL, NULL);
/* Dump the bogus tag like browsers do */
- while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
+ while ((CUR == 0) && (CUR != '>'))
NEXT;
htmlParserFinishElementParsing(ctxt);
@@ -5113,7 +5128,7 @@ htmlCreateDocParserCtxt(const xmlChar *cur, const char *encoding) {
* @first: the first char to lookup
* @next: the next char to lookup or zero
* @third: the next char to lookup or zero
- * @comment: flag to force checking inside comments
+ * @ignoreattrval: skip over attribute values
*
* Try to find if a sequence (first, next, third) or just (first next) or
* (first) is available in the input stream.
@@ -5127,13 +5142,11 @@ htmlCreateDocParserCtxt(const xmlChar *cur, const char *encoding) {
*/
static int
htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
- xmlChar next, xmlChar third, int iscomment,
- int ignoreattrval)
+ xmlChar next, xmlChar third, int ignoreattrval)
{
int base, len;
htmlParserInputPtr in;
const xmlChar *buf;
- int incomment = 0;
int invalue = 0;
char valdellim = 0x0;
@@ -5145,8 +5158,11 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
if (base < 0)
return (-1);
- if (ctxt->checkIndex > base)
+ if (ctxt->checkIndex > base) {
base = ctxt->checkIndex;
+ /* Abuse hasPErefs member to restore current state. */
+ invalue = ctxt->hasPErefs & 1 ? 1 : 0;
+ }
if (in->buf == NULL) {
buf = in->base;
@@ -5162,14 +5178,6 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
else if (next)
len--;
for (; base < len; base++) {
- if ((!incomment) && (base + 4 < len) && (!iscomment)) {
- if ((buf[base] == '<') && (buf[base + 1] == '!') &&
- (buf[base + 2] == '-') && (buf[base + 3] == '-')) {
- incomment = 1;
- /* do not increment past <! - some people use <!--> */
- base += 2;
- }
- }
if (ignoreattrval) {
if (buf[base] == '"' || buf[base] == '\'') {
if (invalue) {
@@ -5186,16 +5194,6 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
continue;
}
}
- if (incomment) {
- if (base + 3 > len)
- return (-1);
- if ((buf[base] == '-') && (buf[base + 1] == '-') &&
- (buf[base + 2] == '>')) {
- incomment = 0;
- base += 2;
- }
- continue;
- }
if (buf[base] == first) {
if (third != 0) {
if ((buf[base + 1] != next) || (buf[base + 2] != third))
@@ -5222,8 +5220,12 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
return (base - (in->cur - in->base));
}
}
- if ((!incomment) && (!invalue))
- ctxt->checkIndex = base;
+ ctxt->checkIndex = base;
+ /* Abuse hasPErefs member to track current state. */
+ if (invalue)
+ ctxt->hasPErefs |= 1;
+ else
+ ctxt->hasPErefs &= ~1;
#ifdef DEBUG_PUSH
if (next == 0)
xmlGenericError(xmlGenericErrorContext,
@@ -5240,80 +5242,6 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
}
/**
- * htmlParseLookupChars:
- * @ctxt: an HTML parser context
- * @stop: Array of chars, which stop the lookup.
- * @stopLen: Length of stop-Array
- *
- * Try to find if any char of the stop-Array is available in the input
- * stream.
- * This function has a side effect of (possibly) incrementing ctxt->checkIndex
- * to avoid rescanning sequences of bytes, it DOES change the state of the
- * parser, do not use liberally.
- *
- * Returns the index to the current parsing point if a stopChar
- * is available, -1 otherwise.
- */
-static int
-htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
- int stopLen)
-{
- int base, len;
- htmlParserInputPtr in;
- const xmlChar *buf;
- int incomment = 0;
- int i;
-
- in = ctxt->input;
- if (in == NULL)
- return (-1);
-
- base = in->cur - in->base;
- if (base < 0)
- return (-1);
-
- if (ctxt->checkIndex > base)
- base = ctxt->checkIndex;
-
- if (in->buf == NULL) {
- buf = in->base;
- len = in->length;
- } else {
- buf = xmlBufContent(in->buf->buffer);
- len = xmlBufUse(in->buf->buffer);
- }
-
- for (; base < len; base++) {
- if (!incomment && (base + 4 < len)) {
- if ((buf[base] == '<') && (buf[base + 1] == '!') &&
- (buf[base + 2] == '-') && (buf[base + 3] == '-')) {
- incomment = 1;
- /* do not increment past <! - some people use <!--> */
- base += 2;
- }
- }
- if (incomment) {
- if (base + 3 > len)
- return (-1);
- if ((buf[base] == '-') && (buf[base + 1] == '-') &&
- (buf[base + 2] == '>')) {
- incomment = 0;
- base += 2;
- }
- continue;
- }
- for (i = 0; i < stopLen; ++i) {
- if (buf[base] == stop[i]) {
- ctxt->checkIndex = 0;
- return (base - (in->cur - in->base));
- }
- }
- }
- ctxt->checkIndex = base;
- return (-1);
-}
-
-/**
* htmlParseTryOrFinish:
* @ctxt: an HTML parser context
* @terminate: last chunk indicator
@@ -5326,7 +5254,7 @@ static int
htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
int ret = 0;
htmlParserInputPtr in;
- int avail = 0;
+ ptrdiff_t avail = 0;
xmlChar cur, next;
htmlParserNodeInfo node_info;
@@ -5391,7 +5319,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
+ avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) -
+ (in->cur - in->base);
if ((avail == 0) && (terminate)) {
htmlAutoCloseOnEnd(ctxt);
if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
@@ -5405,6 +5334,12 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
}
if (avail < 1)
goto done;
+ /*
+ * This is done to make progress and avoid an infinite loop
+ * if a parsing attempt was aborted by hitting a NUL byte. After
+ * changing htmlCurrentChar, this probably isn't necessary anymore.
+ * We should consider removing this check.
+ */
cur = in->cur[0];
if (cur == 0) {
SKIP(1);
@@ -5427,7 +5362,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
+ avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) -
+ (in->cur - in->base);
}
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
ctxt->sax->setDocumentLocator(ctxt->userData,
@@ -5444,7 +5380,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -5469,14 +5405,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
+ avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) -
+ (in->cur - in->base);
/*
* no chars in buffer
*/
if (avail < 1)
goto done;
/*
- * not enouth chars in buffer
+ * not enough chars in buffer
*/
if (avail < 2) {
if (!terminate)
@@ -5490,7 +5427,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
+ (htmlParseLookupSequence(ctxt, '-', '-', '>', 0) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -5500,7 +5437,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
ctxt->instate = XML_PARSER_MISC;
} else if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -5514,7 +5451,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -5530,7 +5467,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
(avail < 9)) {
goto done;
} else {
- ctxt->instate = XML_PARSER_START_TAG;
+ ctxt->instate = XML_PARSER_CONTENT;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"HPP: entering START_TAG\n");
@@ -5542,7 +5479,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
+ avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) -
+ (in->cur - in->base);
if (avail < 2)
goto done;
cur = in->cur[0];
@@ -5550,7 +5488,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
+ (htmlParseLookupSequence(ctxt, '-', '-', '>', 0) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -5560,7 +5498,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
ctxt->instate = XML_PARSER_PROLOG;
} else if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -5572,7 +5510,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
(avail < 4)) {
goto done;
} else {
- ctxt->instate = XML_PARSER_START_TAG;
+ ctxt->instate = XML_PARSER_CONTENT;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"HPP: entering START_TAG\n");
@@ -5583,7 +5521,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
+ avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) -
+ (in->cur - in->base);
if (avail < 1)
goto done;
cur = in->cur[0];
@@ -5597,7 +5536,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
+ (htmlParseLookupSequence(ctxt, '-', '-', '>', 0) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -5607,7 +5546,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
ctxt->instate = XML_PARSER_EPILOG;
} else if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -5642,7 +5581,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (avail < 1)
goto done;
/*
- * not enouth chars in buffer
+ * not enough chars in buffer
*/
if (avail < 2) {
if (!terminate)
@@ -5671,7 +5610,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
break;
}
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0))
goto done;
/* Capture start position */
@@ -5762,13 +5701,13 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
break;
}
case XML_PARSER_CONTENT: {
+ xmlChar chr[2] = { 0, 0 };
long cons;
+
/*
* Handle preparsed entities and charRef
*/
if (ctxt->token != 0) {
- xmlChar chr[2] = { 0 , 0 } ;
-
chr[0] = (xmlChar) ctxt->token;
htmlCheckParagraph(ctxt);
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
@@ -5780,21 +5719,22 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
cur = in->cur[0];
if ((cur != '<') && (cur != '&')) {
if (ctxt->sax != NULL) {
+ chr[0] = cur;
if (IS_BLANK_CH(cur)) {
if (ctxt->keepBlanks) {
if (ctxt->sax->characters != NULL)
ctxt->sax->characters(
- ctxt->userData, &in->cur[0], 1);
+ ctxt->userData, chr, 1);
} else {
if (ctxt->sax->ignorableWhitespace != NULL)
ctxt->sax->ignorableWhitespace(
- ctxt->userData, &in->cur[0], 1);
+ ctxt->userData, chr, 1);
}
} else {
htmlCheckParagraph(ctxt);
if (ctxt->sax->characters != NULL)
ctxt->sax->characters(
- ctxt->userData, &in->cur[0], 1);
+ ctxt->userData, chr, 1);
}
}
ctxt->token = 0;
@@ -5817,7 +5757,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
int idx;
xmlChar val;
- idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 0);
+ idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0);
if (idx < 0)
goto done;
val = in->cur[idx + 2];
@@ -5844,7 +5784,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0))
goto done;
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"Misplaced DOCTYPE declaration\n",
@@ -5854,7 +5794,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
(htmlParseLookupSequence(
- ctxt, '-', '-', '>', 1, 1) < 0))
+ ctxt, '-', '-', '>', 0) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -5864,7 +5804,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
ctxt->instate = XML_PARSER_CONTENT;
} else if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@@ -5883,24 +5823,35 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
#endif
break;
} else if (cur == '<') {
- ctxt->instate = XML_PARSER_START_TAG;
- ctxt->checkIndex = 0;
+ if ((!terminate) && (next == 0))
+ goto done;
+ /*
+ * Only switch to START_TAG if the next character
+ * starts a valid name. Otherwise, htmlParseStartTag
+ * might return without consuming all characters
+ * up to the final '>'.
+ */
+ if ((IS_ASCII_LETTER(next)) ||
+ (next == '_') || (next == ':') || (next == '.')) {
+ ctxt->instate = XML_PARSER_START_TAG;
+ ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
- xmlGenericError(xmlGenericErrorContext,
- "HPP: entering START_TAG\n");
+ xmlGenericError(xmlGenericErrorContext,
+ "HPP: entering START_TAG\n");
#endif
+ } else {
+ htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
+ "htmlParseTryOrFinish: "
+ "invalid element name\n",
+ NULL, NULL);
+ htmlCheckParagraph(ctxt);
+ if ((ctxt->sax != NULL) &&
+ (ctxt->sax->characters != NULL))
+ ctxt->sax->characters(ctxt->userData,
+ in->cur, 1);
+ NEXT;
+ }
break;
- } else if (cur == '&') {
- if ((!terminate) &&
- (htmlParseLookupChars(ctxt,
- BAD_CAST "; >/", 4) < 0))
- goto done;
-#ifdef DEBUG_PUSH
- xmlGenericError(xmlGenericErrorContext,
- "HPP: Parsing Reference\n");
-#endif
- /* TODO: check generation of subtrees if noent !!! */
- htmlParseReference(ctxt);
} else {
/*
* check that the text sequence is complete
@@ -5909,14 +5860,21 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
* data detection.
*/
if ((!terminate) &&
- (htmlParseLookupChars(ctxt, BAD_CAST "<&", 2) < 0))
+ (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0))
goto done;
ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"HPP: Parsing char data\n");
#endif
- htmlParseCharData(ctxt);
+ while ((cur != '<') && (cur != 0)) {
+ if (cur == '&') {
+ htmlParseReference(ctxt);
+ } else {
+ htmlParseCharData(ctxt);
+ }
+ cur = in->cur[0];
+ }
}
}
if (cons == ctxt->nbChars) {
@@ -5935,7 +5893,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (avail < 2)
goto done;
if ((!terminate) &&
- (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
+ (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
goto done;
htmlParseEndTag(ctxt);
if (ctxt->nameNr == 0) {
@@ -6117,12 +6075,12 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
int res;
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
+ xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
if (res < 0) {
ctxt->errNo = XML_PARSER_EOF;
ctxt->disableSAX = 1;
return (XML_PARSER_EOF);
}
- xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
#endif
@@ -6141,12 +6099,12 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
size_t current = ctxt->input->cur - ctxt->input->base;
nbchars = xmlCharEncInput(in, terminate);
+ xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
if (nbchars < 0) {
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
"encoder error\n", NULL, NULL);
return(XML_ERR_INVALID_ENCODING);
}
- xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
}
}
}