aboutsummaryrefslogtreecommitdiffstats
path: root/xmlreader.c
diff options
context:
space:
mode:
authorDaniel Veillard <veillard@src.gnome.org>2003-04-21 21:36:41 +0000
committerDaniel Veillard <veillard@src.gnome.org>2003-04-21 21:36:41 +0000
commita880b124750afcce94ea3d947125d0f8a034e766 (patch)
tree71e12d33ccb7f395c669db50cc05dd2edf6b6508 /xmlreader.c
parent83c8a5cd3109c4625e2dacdf07e0634347e25b71 (diff)
downloadandroid_external_libxml2-a880b124750afcce94ea3d947125d0f8a034e766.tar.gz
android_external_libxml2-a880b124750afcce94ea3d947125d0f8a034e766.tar.bz2
android_external_libxml2-a880b124750afcce94ea3d947125d0f8a034e766.zip
a lot of performance work especially the speed of streaming through the
* globals.c libxml.h parser.c parserInternals.c tree.c xmllint.c xmlreader.c include/libxml/parser.h: a lot of performance work especially the speed of streaming through the reader and push interface. Some thread related optimizations. Nearly doubled the speed of parsing through the reader. Daniel
Diffstat (limited to 'xmlreader.c')
-rw-r--r--xmlreader.c110
1 files changed, 37 insertions, 73 deletions
diff --git a/xmlreader.c b/xmlreader.c
index 6cdadc97..9a24110c 100644
--- a/xmlreader.c
+++ b/xmlreader.c
@@ -56,6 +56,7 @@
#define DUMP_READER
#endif
+#define CHUNK_SIZE 512
/************************************************************************
* *
* The parser: maps the Text Reader API on top of the existing *
@@ -340,9 +341,8 @@ xmlTextReaderCDataBlock(void *ctx, const xmlChar *ch, int len)
*/
static int
xmlTextReaderPushData(xmlTextReaderPtr reader) {
- unsigned int cur = reader->cur;
xmlBufferPtr inbuf;
- int val;
+ int val, s;
int oldstate;
if ((reader->input == NULL) || (reader->input->buffer == NULL))
@@ -351,8 +351,9 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) {
oldstate = reader->state;
reader->state = XML_TEXTREADER_NONE;
inbuf = reader->input->buffer;
+
while (reader->state == XML_TEXTREADER_NONE) {
- if (cur >= inbuf->use) {
+ if (inbuf->use < reader->cur + CHUNK_SIZE) {
/*
* Refill the buffer unless we are at the end of the stream
*/
@@ -365,47 +366,39 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) {
(reader->ctxt->myDoc != NULL))
return(val);
}
+
} else
break;
}
/*
- * parse by block of 512 bytes
+ * parse by block of CHUNK_SIZE bytes, various tests show that
+ * it's the best tradeoff at least on a 1.2GH Duron
*/
- if ((cur >= reader->cur + 512) || (cur >= inbuf->use)) {
- if (cur < inbuf->use)
- cur = cur + 1;
+ if (inbuf->use >= reader->cur + CHUNK_SIZE) {
val = xmlParseChunk(reader->ctxt,
(const char *) &inbuf->content[reader->cur],
- cur - reader->cur, 0);
+ CHUNK_SIZE, 0);
+ reader->cur += CHUNK_SIZE;
if (val != 0)
return(-1);
- reader->cur = cur;
- break;
} else {
- cur = cur + 1;
-
- /*
- * One may have to force a flush at some point when parsing really
- * large CDATA sections
- */
- if ((cur - reader->cur > 4096) && (reader->base == 0) &&
- (reader->mode == XML_TEXTREADER_MODE_INTERACTIVE)) {
- cur = cur + 1;
- val = xmlParseChunk(reader->ctxt,
- (const char *) &inbuf->content[reader->cur],
- cur - reader->cur, 0);
- if (val != 0)
- return(-1);
- reader->cur = cur;
- }
+ s = inbuf->use - reader->cur;
+ val = xmlParseChunk(reader->ctxt,
+ (const char *) &inbuf->content[reader->cur],
+ s, 0);
+ reader->cur += s;
+ if (val != 0)
+ return(-1);
+ break;
}
}
+
/*
* Discard the consumed input when needed and possible
*/
if (reader->mode == XML_TEXTREADER_MODE_INTERACTIVE) {
- if ((reader->cur >= 4096) && (reader->base == 0)) {
- val = xmlBufferShrink(inbuf, cur);
+ if (reader->cur >= 4096) {
+ val = xmlBufferShrink(inbuf, reader->cur);
if (val >= 0) {
reader->cur -= val;
}
@@ -416,12 +409,13 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) {
* At the end of the stream signal that the work is done to the Push
* parser.
*/
- if (reader->mode == XML_TEXTREADER_MODE_EOF) {
+ else if (reader->mode == XML_TEXTREADER_MODE_EOF) {
if (reader->mode != XML_TEXTREADER_DONE) {
+ s = inbuf->use - reader->cur;
val = xmlParseChunk(reader->ctxt,
(const char *) &inbuf->content[reader->cur],
- cur - reader->cur, 1);
- reader->cur = cur;
+ s, 1);
+ reader->cur = inbuf->use;
reader->mode = XML_TEXTREADER_DONE;
}
}
@@ -767,17 +761,17 @@ get_next_node:
* that the parser didn't finished or that we arent at the end
* of stream, continue processing.
*/
- while (((oldstate == XML_TEXTREADER_BACKTRACK) ||
+ while ((reader->node->next == NULL) &&
+ (reader->ctxt->nodeNr == olddepth) &&
+ ((oldstate == XML_TEXTREADER_BACKTRACK) ||
(reader->node->children == NULL) ||
(reader->node->type == XML_ENTITY_REF_NODE) ||
(reader->node->type == XML_DTD_NODE) ||
(reader->node->type == XML_DOCUMENT_NODE) ||
(reader->node->type == XML_HTML_DOCUMENT_NODE)) &&
- (reader->node->next == NULL) &&
((reader->ctxt->node == NULL) ||
(reader->ctxt->node == reader->node) ||
(reader->ctxt->node == reader->node->parent)) &&
- (reader->ctxt->nodeNr == olddepth) &&
(reader->ctxt->instate != XML_PARSER_EOF)) {
val = xmlTextReaderPushData(reader);
if (val < 0)
@@ -785,45 +779,6 @@ get_next_node:
if (reader->node == NULL)
goto node_end;
}
- /*
- * If we are in the middle of a piece of CDATA make sure it's finished
- * Maybe calling a function checking that a non-character() callback was
- * received would be cleaner for the loop exit.
- */
- if ((oldstate == XML_TEXTREADER_ELEMENT) &&
- (reader->ctxt->instate == XML_PARSER_CDATA_SECTION)) {
- while ((reader->ctxt->instate == XML_PARSER_CDATA_SECTION) &&
- (((reader->node->content == NULL) &&
- (reader->node->next != NULL) &&
- (reader->node->next->type == XML_CDATA_SECTION_NODE) &&
- (reader->node->next->next == NULL) &&
- (reader->node->parent->next == NULL)) ||
- ((reader->node->children != NULL) &&
- (reader->node->children->type == XML_CDATA_SECTION_NODE) &&
- (reader->node->children->next == NULL) &&
- (reader->node->children->next == NULL)))) {
- val = xmlTextReaderPushData(reader);
- if (val < 0)
- return(-1);
- }
- }
- if ((oldstate == XML_TEXTREADER_ELEMENT) &&
- (reader->ctxt->instate == XML_PARSER_CONTENT)) {
- while ((reader->ctxt->instate == XML_PARSER_CONTENT) &&
- (((reader->node->content == NULL) &&
- (reader->node->next != NULL) &&
- (reader->node->next->type == XML_TEXT_NODE) &&
- (reader->node->next->next == NULL) &&
- (reader->node->parent->next == NULL)) ||
- ((reader->node->children != NULL) &&
- (reader->node->children->type == XML_TEXT_NODE) &&
- (reader->node->children->next == NULL) &&
- (reader->node->children->next == NULL)))) {
- val = xmlTextReaderPushData(reader);
- if (val < 0)
- return(-1);
- }
- }
if (oldstate != XML_TEXTREADER_BACKTRACK) {
if ((reader->node->children != NULL) &&
(reader->node->type != XML_ENTITY_REF_NODE) &&
@@ -900,6 +855,15 @@ node_found:
DUMP_READER
/*
+ * If we are in the middle of a piece of CDATA make sure it's finished
+ */
+ if ((reader->node != NULL) &&
+ ((reader->node->type == XML_TEXT_NODE) ||
+ (reader->node->type == XML_CDATA_SECTION_NODE))) {
+ xmlTextReaderExpand(reader);
+ }
+
+ /*
* Handle entities enter and exit when in entity replacement mode
*/
if ((reader->node != NULL) &&