aboutsummaryrefslogtreecommitdiffstats
path: root/xmlregexp.c
diff options
context:
space:
mode:
authorDaniel Veillard <veillard@src.gnome.org>2007-08-22 16:29:21 +0000
committerDaniel Veillard <veillard@src.gnome.org>2007-08-22 16:29:21 +0000
commit76d59b6d6ff8284ee6ed69ee4118e7da935d5e57 (patch)
tree7ff2d93d75b927c9071ce5cc020cfaf79c210c07 /xmlregexp.c
parent3dcd319a464b54f5492b28e57244c688c33c73ac (diff)
downloadandroid_external_libxml2-76d59b6d6ff8284ee6ed69ee4118e7da935d5e57.tar.gz
android_external_libxml2-76d59b6d6ff8284ee6ed69ee4118e7da935d5e57.tar.bz2
android_external_libxml2-76d59b6d6ff8284ee6ed69ee4118e7da935d5e57.zip
try to fix for the nth time the automata generation in case of complex
* xmlregexp.c: try to fix for the nth time the automata generation in case of complex ranges. I suppose that time it is actually okay Daniel svn path=/trunk/; revision=3650
Diffstat (limited to 'xmlregexp.c')
-rw-r--r--xmlregexp.c190
1 files changed, 153 insertions, 37 deletions
diff --git a/xmlregexp.c b/xmlregexp.c
index e729d574..7f8921b7 100644
--- a/xmlregexp.c
+++ b/xmlregexp.c
@@ -189,6 +189,7 @@ struct _xmlRegAtom {
int neg;
int codepoint;
xmlRegStatePtr start;
+ xmlRegStatePtr start0;
xmlRegStatePtr stop;
int maxRanges;
int nbRanges;
@@ -733,11 +734,41 @@ xmlRegFreeRange(xmlRegRangePtr range) {
}
/**
+ * xmlRegCopyRange:
+ * @range: the regexp range
+ *
+ * Copy a regexp range
+ *
+ * Returns the new copy or NULL in case of error.
+ */
+static xmlRegRangePtr
+xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
+ xmlRegRangePtr ret;
+
+ if (range == NULL)
+ return(NULL);
+
+ ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
+ range->end);
+ if (ret == NULL)
+ return(NULL);
+ if (range->blockName != NULL) {
+ ret->blockName = xmlStrdup(range->blockName);
+ if (ret->blockName == NULL) {
+ xmlRegexpErrMemory(ctxt, "allocating range");
+ xmlRegFreeRange(ret);
+ return(NULL);
+ }
+ }
+ return(ret);
+}
+
+/**
* xmlRegNewAtom:
* @ctxt: the regexp parser context
* @type: the type of atom
*
- * Allocate a new regexp range
+ * Allocate a new atom
*
* Returns the new atom or NULL in case of error
*/
@@ -784,6 +815,52 @@ xmlRegFreeAtom(xmlRegAtomPtr atom) {
xmlFree(atom);
}
+/**
+ * xmlRegCopyAtom:
+ * @ctxt: the regexp parser context
+ * @atom: the oiginal atom
+ *
+ * Allocate a new regexp range
+ *
+ * Returns the new atom or NULL in case of error
+ */
+static xmlRegAtomPtr
+xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
+ xmlRegAtomPtr ret;
+
+ ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
+ if (ret == NULL) {
+ xmlRegexpErrMemory(ctxt, "copying atom");
+ return(NULL);
+ }
+ memset(ret, 0, sizeof(xmlRegAtom));
+ ret->type = atom->type;
+ ret->quant = atom->quant;
+ ret->min = atom->min;
+ ret->max = atom->max;
+ if (atom->nbRanges > 0) {
+ int i;
+
+ ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
+ atom->nbRanges);
+ if (ret->ranges == NULL) {
+ xmlRegexpErrMemory(ctxt, "copying atom");
+ goto error;
+ }
+ for (i = 0;i < atom->nbRanges;i++) {
+ ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
+ if (ret->ranges[i] == NULL)
+ goto error;
+ ret->nbRanges = i + 1;
+ }
+ }
+ return(ret);
+
+error:
+ xmlRegFreeAtom(ret);
+ return(NULL);
+}
+
static xmlRegStatePtr
xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
xmlRegStatePtr ret;
@@ -1504,52 +1581,84 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
break;
case XML_REGEXP_QUANT_RANGE: {
int counter;
- xmlRegStatePtr newstate;
+ xmlRegStatePtr inter, newstate;
/*
- * This one is nasty:
- * 1/ if range has minOccurs == 0, create a new state
- * and create epsilon transitions from atom->start
- * to atom->stop, as well as atom->start to the new
- * state
- * 2/ register a new counter
- * 3/ register an epsilon transition associated to
- * this counter going from atom->stop to atom->start
- * 4/ create a new state
- * 5/ generate a counted transition from atom->stop to
- * that state
+ * create the final state now if needed
*/
- if (atom->min == 0) {
- xmlFAGenerateEpsilonTransition(ctxt, atom->start,
- atom->stop);
+ if (to != NULL) {
+ newstate = to;
+ } else {
newstate = xmlRegNewState(ctxt);
xmlRegStatePush(ctxt, newstate);
- ctxt->state = newstate;
+ }
+
+ /*
+ * The principle here is to use counted transition
+ * to avoid explosion in the number of states in the
+ * graph. This is clearly more complex but should not
+ * be exploitable at runtime.
+ */
+ if ((atom->min == 0) && (atom->start0 == NULL)) {
+ xmlRegAtomPtr copy;
+ /*
+ * duplicate a transition based on atom to count next
+ * occurences after 1. We cannot loop to atom->start
+ * directly because we need an epsilon transition to
+ * newstate.
+ */
+ /* ???? For some reason it seems we never reach that
+ case, I suppose this got optimized out before when
+ building the automata */
+
+ if (copy == NULL)
+ return(-1);
+ copy = xmlRegCopyAtom(ctxt, atom);
+ copy->quant = XML_REGEXP_QUANT_ONCE;
+ copy->min = 0;
+ copy->max = 0;
+
+ if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
+ < 0)
+ return(-1);
+ inter = ctxt->state;
+ counter = xmlRegGetCounter(ctxt);
+ ctxt->counters[counter].min = atom->min - 1;
+ ctxt->counters[counter].max = atom->max - 1;
+ /* count the number of times we see it again */
+ xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
+ atom->stop, counter);
+ /* allow a way out based on the count */
+ xmlFAGenerateCountedTransition(ctxt, inter,
+ newstate, counter);
+ /* and also allow a direct exit for 0 */
xmlFAGenerateEpsilonTransition(ctxt, atom->start,
- newstate);
+ newstate);
+ } else {
+ /*
+ * either we need the atom at least once or there
+ * is an atom->start0 allowing to easilly plug the
+ * epsilon transition.
+ */
+ counter = xmlRegGetCounter(ctxt);
+ ctxt->counters[counter].min = atom->min - 1;
+ ctxt->counters[counter].max = atom->max - 1;
+ /* count the number of times we see it again */
+ xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
+ atom->start, counter);
+ /* allow a way out based on the count */
+ xmlFAGenerateCountedTransition(ctxt, atom->stop,
+ newstate, counter);
+ /* and if needed allow a direct exit for 0 */
+ if (atom->min == 0)
+ xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
+ newstate);
+
}
- counter = xmlRegGetCounter(ctxt);
- ctxt->counters[counter].min = atom->min - 1;
- ctxt->counters[counter].max = atom->max - 1;
atom->min = 0;
atom->max = 0;
atom->quant = XML_REGEXP_QUANT_ONCE;
- if (to != NULL) {
- newstate = to;
- } else {
- newstate = xmlRegNewState(ctxt);
- xmlRegStatePush(ctxt, newstate);
- }
ctxt->state = newstate;
- xmlFAGenerateCountedTransition(ctxt, atom->stop,
- newstate, counter);
-
- /*
- * first check count and if OK jump forward;
- * if checking fail increment count and jump back
- */
- xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
- atom->start, counter);
}
default:
break;
@@ -5113,9 +5222,15 @@ xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
} else if (CUR == ')') {
return(0);
} else if (CUR == '(') {
- xmlRegStatePtr start, oldend;
+ xmlRegStatePtr start, oldend, start0;
NEXT;
+ /*
+ * this extra Epsilon transition is needed if we count with 0 allowed
+ * unfortunately this can't be known at that point
+ */
+ xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
+ start0 = ctxt->state;
xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
start = ctxt->state;
oldend = ctxt->end;
@@ -5131,6 +5246,7 @@ xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
if (ctxt->atom == NULL)
return(-1);
ctxt->atom->start = start;
+ ctxt->atom->start0 = start0;
ctxt->atom->stop = ctxt->state;
ctxt->end = oldend;
return(1);