aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--tree.c12
-rw-r--r--xmlregexp.c37
3 files changed, 50 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index 8f5fb77d..8f2d7813 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Wed Nov 1 16:33:10 CET 2006 Daniel Veillard <daniel@veillard.com>
+
+ * tree.c: applied documentation patches from Markus Keim
+ * xmlregexp.c: fixed one bug and added a couple of optimisations
+ while working on bug #362989
+
Fri Oct 27 14:54:07 CEST 2006 Daniel Veillard <daniel@veillard.com>
* HTMLparser.c: applied a reworked version of Usamah Malik patch
diff --git a/tree.c b/tree.c
index d5b9fecb..ffbcabd3 100644
--- a/tree.c
+++ b/tree.c
@@ -5199,6 +5199,9 @@ xmlNodeGetContent(xmlNodePtr cur)
* @content: the new value of the content
*
* Replace the content of a node.
+ * NOTE: @content is supposed to be a piece of XML CDATA, so it allows entity
+ * references, but XML special chars need to be escaped first by using
+ * xmlEncodeEntitiesReentrant() resp. xmlEncodeSpecialChars().
*/
void
xmlNodeSetContent(xmlNodePtr cur, const xmlChar *content) {
@@ -5273,6 +5276,9 @@ xmlNodeSetContent(xmlNodePtr cur, const xmlChar *content) {
* @len: the size of @content
*
* Replace the content of a node.
+ * NOTE: @content is supposed to be a piece of XML CDATA, so it allows entity
+ * references, but XML special chars need to be escaped first by using
+ * xmlEncodeEntitiesReentrant() resp. xmlEncodeSpecialChars().
*/
void
xmlNodeSetContentLen(xmlNodePtr cur, const xmlChar *content, int len) {
@@ -5344,6 +5350,9 @@ xmlNodeSetContentLen(xmlNodePtr cur, const xmlChar *content, int len) {
* @len: the size of @content
*
* Append the extra substring to the node content.
+ * NOTE: In contrast to xmlNodeSetContentLen(), @content is supposed to be
+ * raw text, so unescaped XML special chars are allowed, entity
+ * references are not supported.
*/
void
xmlNodeAddContentLen(xmlNodePtr cur, const xmlChar *content, int len) {
@@ -5416,6 +5425,9 @@ xmlNodeAddContentLen(xmlNodePtr cur, const xmlChar *content, int len) {
* @content: extra content
*
* Append the extra substring to the node content.
+ * NOTE: In contrast to xmlNodeSetContent(), @content is supposed to be
+ * raw text, so unescaped XML special chars are allowed, entity
+ * references are not supported.
*/
void
xmlNodeAddContent(xmlNodePtr cur, const xmlChar *content) {
diff --git a/xmlregexp.c b/xmlregexp.c
index e7d519eb..9719cd54 100644
--- a/xmlregexp.c
+++ b/xmlregexp.c
@@ -145,7 +145,8 @@ typedef enum {
XML_REGEXP_START_STATE = 1,
XML_REGEXP_FINAL_STATE,
XML_REGEXP_TRANS_STATE,
- XML_REGEXP_SINK_STATE
+ XML_REGEXP_SINK_STATE,
+ XML_REGEXP_UNREACH_STATE
} xmlRegStateType;
typedef enum {
@@ -1709,6 +1710,8 @@ xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
continue;
if (state->nbTrans != 1)
continue;
+ if (state->type == XML_REGEXP_UNREACH_STATE)
+ continue;
/* is the only transition out a basic transition */
if ((state->trans[0].atom == NULL) &&
(state->trans[0].to >= 0) &&
@@ -1731,13 +1734,20 @@ xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
tmp = ctxt->states[state->transTo[i]];
for (j = 0;j < tmp->nbTrans;j++) {
if (tmp->trans[j].to == statenr) {
- tmp->trans[j].to = newto;
#ifdef DEBUG_REGEXP_GRAPH
printf("Changed transition %d on %d to go to %d\n",
j, tmp->no, newto);
#endif
+#if 0
+ tmp->trans[j].to = newto;
xmlRegStateAddTransTo(ctxt, ctxt->states[newto],
tmp->no);
+#endif
+ tmp->trans[j].to = -1;
+ xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
+ ctxt->states[newto],
+ tmp->trans[j].counter,
+ tmp->trans[j].count);
}
}
}
@@ -1760,6 +1770,7 @@ xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
/* eliminate the transition completely */
state->nbTrans = 0;
+ state->type = XML_REGEXP_UNREACH_STATE;
}
@@ -1779,7 +1790,21 @@ xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
if (ctxt->states == NULL) return;
+ /*
+ * Eliminate simple epsilon transition and the associated unreachable
+ * states.
+ */
xmlFAEliminateSimpleEpsilonTransitions(ctxt);
+ for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
+ state = ctxt->states[statenr];
+ if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("Removed unreachable state %d\n", statenr);
+#endif
+ xmlRegFreeState(state);
+ ctxt->states[statenr] = NULL;
+ }
+ }
has_epsilon = 0;
@@ -1812,8 +1837,9 @@ xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
printf("Found epsilon trans %d from %d to %d\n",
transnr, statenr, newto);
#endif
- state->mark = XML_REGEXP_MARK_START;
has_epsilon = 1;
+ state->trans[transnr].to = -2;
+ state->mark = XML_REGEXP_MARK_START;
xmlFAReduceEpsilonTransitions(ctxt, statenr,
newto, state->trans[transnr].counter);
state->mark = XML_REGEXP_MARK_NORMAL;
@@ -2424,7 +2450,7 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
* check transitions conflicting with the one looked at
*/
if (t1->atom == NULL) {
- if (t1->to == -1)
+ if (t1->to < 0)
continue;
res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
to, atom);
@@ -2875,7 +2901,8 @@ xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
int i;
printf(": ");
for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
- printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]);
+ printf("%s ", (const char *)
+ exec->inputStack[exec->inputStackNr - (i + 1)].value);
} else {
printf(": %s", &(exec->inputString[exec->index]));
}