diff options
author | Xin Li <delphij@google.com> | 2016-07-26 14:22:54 -0700 |
---|---|---|
committer | Xin Li <delphij@google.com> | 2016-08-15 22:30:45 +0000 |
commit | a136fc2e5a8893a02b0912862d26a575fcbd641a (patch) | |
tree | 341dc6790424526b424173dc7bc5a1165db7de10 /parserInternals.c | |
parent | edb5870767fed8712a9b77ef34097209b61ab2db (diff) | |
download | android_external_libxml2-a136fc2e5a8893a02b0912862d26a575fcbd641a.tar.gz android_external_libxml2-a136fc2e5a8893a02b0912862d26a575fcbd641a.tar.bz2 android_external_libxml2-a136fc2e5a8893a02b0912862d26a575fcbd641a.zip |
Merge remote-tracking branch 'goog/upstream-master' into mymerge
BUG: 29834751
Change-Id: I88fc1d4f86bcbd0ac0fe9acdbe764f3d738c5f32
(cherry picked from commit e3d78e1fe0669e9c7083a4de19f1e06171849b28)
Diffstat (limited to 'parserInternals.c')
-rw-r--r-- | parserInternals.c | 178 |
1 files changed, 92 insertions, 86 deletions
diff --git a/parserInternals.c b/parserInternals.c index 2b8646c2..bfc778ac 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -55,6 +55,10 @@ #include <libxml/globals.h> #include <libxml/chvalid.h> +#define CUR(ctxt) ctxt->input->cur +#define END(ctxt) ctxt->input->end +#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt)) + #include "buf.h" #include "enc.h" @@ -165,7 +169,7 @@ __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, * * Handle an internal error */ -static void +static void LIBXML_ATTR_FORMAT(2,0) xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) { if ((ctxt != NULL) && (ctxt->disableSAX != 0) && @@ -193,7 +197,7 @@ xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) * * n encoding error */ -static void +static void LIBXML_ATTR_FORMAT(3,0) xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg, int val) { @@ -294,7 +298,7 @@ xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUS */ int xmlParserInputGrow(xmlParserInputPtr in, int len) { - size_t ret; + int ret; size_t indx; const xmlChar *content; @@ -422,103 +426,105 @@ xmlNextChar(xmlParserCtxtPtr ctxt) (ctxt->input == NULL)) return; - if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { - if ((*ctxt->input->cur == 0) && - (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && - (ctxt->instate != XML_PARSER_COMMENT)) { - /* - * If we are at the end of the current entity and - * the context allows it, we pop consumed entities - * automatically. - * the auto closing should be blocked in other cases - */ + if (!(VALID_CTXT(ctxt))) { + xmlErrInternal(ctxt, "Parser input data memory error\n", NULL); + ctxt->errNo = XML_ERR_INTERNAL_ERROR; + xmlStopParser(ctxt); + return; + } + + if ((*ctxt->input->cur == 0) && + (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { + if ((ctxt->instate != XML_PARSER_COMMENT)) xmlPopInput(ctxt); - } else { - const unsigned char *cur; - unsigned char c; + return; + } - /* - * 2.11 End-of-Line Handling - * the literal two-character sequence "#xD#xA" or a standalone - * literal #xD, an XML processor must pass to the application - * the single character #xA. - */ - if (*(ctxt->input->cur) == '\n') { - ctxt->input->line++; ctxt->input->col = 1; - } else - ctxt->input->col++; + if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { + const unsigned char *cur; + unsigned char c; - /* - * We are supposed to handle UTF8, check it's valid - * From rfc2044: encoding of the Unicode values on UTF-8: - * - * UCS-4 range (hex.) UTF-8 octet sequence (binary) - * 0000 0000-0000 007F 0xxxxxxx - * 0000 0080-0000 07FF 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx - * - * Check for the 0x110000 limit too - */ - cur = ctxt->input->cur; + /* + * 2.11 End-of-Line Handling + * the literal two-character sequence "#xD#xA" or a standalone + * literal #xD, an XML processor must pass to the application + * the single character #xA. + */ + if (*(ctxt->input->cur) == '\n') { + ctxt->input->line++; ctxt->input->col = 1; + } else + ctxt->input->col++; - c = *cur; - if (c & 0x80) { - if (c == 0xC0) - goto encoding_error; - if (cur[1] == 0) { + /* + * We are supposed to handle UTF8, check it's valid + * From rfc2044: encoding of the Unicode values on UTF-8: + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * + * Check for the 0x110000 limit too + */ + cur = ctxt->input->cur; + + c = *cur; + if (c & 0x80) { + if (c == 0xC0) + goto encoding_error; + if (cur[1] == 0) { + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + cur = ctxt->input->cur; + } + if ((cur[1] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xe0) == 0xe0) { + unsigned int val; + + if (cur[2] == 0) { xmlParserInputGrow(ctxt->input, INPUT_CHUNK); cur = ctxt->input->cur; } - if ((cur[1] & 0xc0) != 0x80) + if ((cur[2] & 0xc0) != 0x80) goto encoding_error; - if ((c & 0xe0) == 0xe0) { - unsigned int val; - - if (cur[2] == 0) { + if ((c & 0xf0) == 0xf0) { + if (cur[3] == 0) { xmlParserInputGrow(ctxt->input, INPUT_CHUNK); cur = ctxt->input->cur; } - if ((cur[2] & 0xc0) != 0x80) + if (((c & 0xf8) != 0xf0) || + ((cur[3] & 0xc0) != 0x80)) goto encoding_error; - if ((c & 0xf0) == 0xf0) { - if (cur[3] == 0) { - xmlParserInputGrow(ctxt->input, INPUT_CHUNK); - cur = ctxt->input->cur; - } - if (((c & 0xf8) != 0xf0) || - ((cur[3] & 0xc0) != 0x80)) - goto encoding_error; - /* 4-byte code */ - ctxt->input->cur += 4; - val = (cur[0] & 0x7) << 18; - val |= (cur[1] & 0x3f) << 12; - val |= (cur[2] & 0x3f) << 6; - val |= cur[3] & 0x3f; - } else { - /* 3-byte code */ - ctxt->input->cur += 3; - val = (cur[0] & 0xf) << 12; - val |= (cur[1] & 0x3f) << 6; - val |= cur[2] & 0x3f; - } - if (((val > 0xd7ff) && (val < 0xe000)) || - ((val > 0xfffd) && (val < 0x10000)) || - (val >= 0x110000)) { - xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, - "Char 0x%X out of allowed range\n", - val); - } - } else - /* 2-byte code */ - ctxt->input->cur += 2; + /* 4-byte code */ + ctxt->input->cur += 4; + val = (cur[0] & 0x7) << 18; + val |= (cur[1] & 0x3f) << 12; + val |= (cur[2] & 0x3f) << 6; + val |= cur[3] & 0x3f; + } else { + /* 3-byte code */ + ctxt->input->cur += 3; + val = (cur[0] & 0xf) << 12; + val |= (cur[1] & 0x3f) << 6; + val |= cur[2] & 0x3f; + } + if (((val > 0xd7ff) && (val < 0xe000)) || + ((val > 0xfffd) && (val < 0x10000)) || + (val >= 0x110000)) { + xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, + "Char 0x%X out of allowed range\n", + val); + } } else - /* 1-byte code */ - ctxt->input->cur++; + /* 2-byte code */ + ctxt->input->cur += 2; + } else + /* 1-byte code */ + ctxt->input->cur++; - ctxt->nbChars++; - if (*ctxt->input->cur == 0) - xmlParserInputGrow(ctxt->input, INPUT_CHUNK); - } + ctxt->nbChars++; + if (*ctxt->input->cur == 0) + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); } else { /* * Assume it's a fixed length encoding (1) with |