diff options
author | Daniel Veillard <veillard@src.gnome.org> | 2006-03-21 23:17:57 +0000 |
---|---|---|
committer | Daniel Veillard <veillard@src.gnome.org> | 2006-03-21 23:17:57 +0000 |
commit | 54eb0243c442292953b4a3df39568735039ebc82 (patch) | |
tree | e0cbf73653c5d83a9d6148eba287f25537b213af /xmlregexp.c | |
parent | aac7c68e87d00732b319698723de1ec43252fb01 (diff) | |
download | android_external_libxml2-54eb0243c442292953b4a3df39568735039ebc82.tar.gz android_external_libxml2-54eb0243c442292953b4a3df39568735039ebc82.tar.bz2 android_external_libxml2-54eb0243c442292953b4a3df39568735039ebc82.zip |
applied patch from Youri Golovanov fixing bug #316338 and adding a couple
* xmlregexp.c: applied patch from Youri Golovanov fixing bug
#316338 and adding a couple of optimizations in the regexp
compilation engine.
* test/regexp/bug316338 result/regexp/bug316338: added regression
tests based on the examples provided in the bug report.
Daniel
Diffstat (limited to 'xmlregexp.c')
-rw-r--r-- | xmlregexp.c | 52 |
1 files changed, 34 insertions, 18 deletions
diff --git a/xmlregexp.c b/xmlregexp.c index 8d7ee97f..58f480da 100644 --- a/xmlregexp.c +++ b/xmlregexp.c @@ -1479,7 +1479,13 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, switch (atom->quant) { case XML_REGEXP_QUANT_OPT: atom->quant = XML_REGEXP_QUANT_ONCE; - xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop); + /* + * transition done to the state after end of atom. + * 1. set transition from atom start to new state + * 2. set transition from atom end to this state. + */ + xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0); + xmlFAGenerateEpsilonTransition(ctxt, atom->stop, ctxt->state); break; case XML_REGEXP_QUANT_MULT: atom->quant = XML_REGEXP_QUANT_ONCE; @@ -1522,8 +1528,6 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, atom->min = 0; atom->max = 0; atom->quant = XML_REGEXP_QUANT_ONCE; - xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop, - atom->start, counter); if (to != NULL) { newstate = to; } else { @@ -1533,6 +1537,13 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, ctxt->state = newstate; xmlFAGenerateCountedTransition(ctxt, atom->stop, newstate, counter); + + /* + * first check count and if OK jump forward; + * if checking fail increment count and jump back + */ + xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop, + atom->start, counter); } default: break; @@ -4299,6 +4310,15 @@ xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) { if (exec->state->nbTrans > exec->transno + 1) { xmlFARegExecSave(exec); } + /* + * restart count for expressions like this ((abc){2})* + */ + if (trans->count >= 0) { +#ifdef DEBUG_REGEXP_EXEC + printf("Reset count %d\n", trans->count); +#endif + exec->counts[trans->count] = 0; + } if (trans->counter >= 0) { #ifdef DEBUG_REGEXP_EXEC printf("Increasing count %d\n", trans->counter); @@ -5112,19 +5132,23 @@ xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) { /** * xmlFAParseBranch: * @ctxt: a regexp parser context + * @to: optional target to the end of the branch + * + * @to is used to optimize by removing duplicate path in automata + * in expressions like (a|b)(c|d) * * [2] branch ::= piece* - 8 */ static int -xmlFAParseBranch(xmlRegParserCtxtPtr ctxt) { +xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) { xmlRegStatePtr previous; int ret; previous = ctxt->state; ret = xmlFAParsePiece(ctxt); if (ret != 0) { - if (xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom) < 0) + if (xmlFAGenerateTransitions(ctxt, previous, + (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0) return(-1); previous = ctxt->state; ctxt->atom = NULL; @@ -5132,8 +5156,8 @@ xmlFAParseBranch(xmlRegParserCtxtPtr ctxt) { while ((ret != 0) && (ctxt->error == 0)) { ret = xmlFAParsePiece(ctxt); if (ret != 0) { - if (xmlFAGenerateTransitions(ctxt, previous, NULL, - ctxt->atom) < 0) + if (xmlFAGenerateTransitions(ctxt, previous, + (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0) return(-1); previous = ctxt->state; ctxt->atom = NULL; @@ -5156,7 +5180,7 @@ xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) { /* if not top start should have been generated by an epsilon trans */ start = ctxt->state; ctxt->end = NULL; - xmlFAParseBranch(ctxt); + xmlFAParseBranch(ctxt, NULL); if (top) { #ifdef DEBUG_REGEXP_GRAPH printf("State %d is final\n", ctxt->state->no); @@ -5172,15 +5196,7 @@ xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) { NEXT; ctxt->state = start; ctxt->end = NULL; - xmlFAParseBranch(ctxt); - if (top) { - ctxt->state->type = XML_REGEXP_FINAL_STATE; -#ifdef DEBUG_REGEXP_GRAPH - printf("State %d is final\n", ctxt->state->no); -#endif - } else { - xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, end); - } + xmlFAParseBranch(ctxt, end); } if (!top) { ctxt->state = end; |