aboutsummaryrefslogtreecommitdiffstats
path: root/xmlregexp.c
diff options
context:
space:
mode:
authorDaniel Veillard <veillard@src.gnome.org>2005-08-22 12:07:04 +0000
committerDaniel Veillard <veillard@src.gnome.org>2005-08-22 12:07:04 +0000
commit465a000b1080427bd62d89a925409b7db78616ac (patch)
treec937a90fe18adfb2b7a053046491360b3e0d649a /xmlregexp.c
parent630215baf8ba0b871603f6f2ccc3bb68a8b45697 (diff)
downloadandroid_external_libxml2-465a000b1080427bd62d89a925409b7db78616ac.tar.gz
android_external_libxml2-465a000b1080427bd62d89a925409b7db78616ac.tar.bz2
android_external_libxml2-465a000b1080427bd62d89a925409b7db78616ac.zip
fixed an uninitialized variable extended the API to add the parser,
* valid.c: fixed an uninitialized variable * xmlregexp.c include/libxml/xmlregexp.h: extended the API to add the parser, serializer and some debugging * include/libxml/xmlversion.h.in: made the new support compiled by default if Schemas is included * testRegexp.c: cleanup and integration of the first part of the new code with a special switch * xmllint.c: show up Expr in --version if compiled in * include/libxml/tree.h: moved the xmlBuffer definition up Daniel
Diffstat (limited to 'xmlregexp.c')
-rw-r--r--xmlregexp.c355
1 files changed, 336 insertions, 19 deletions
diff --git a/xmlregexp.c b/xmlregexp.c
index 7214cc22..b4953e08 100644
--- a/xmlregexp.c
+++ b/xmlregexp.c
@@ -5491,15 +5491,6 @@ xmlAutomataIsDeterminist(xmlAutomataPtr am) {
* Formal Expression handling code *
* *
************************************************************************/
-static void xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr exp, int glob);
-#define PRINT_EXP(exp) { \
- xmlBufferPtr xmlExpBuf; \
- xmlExpBuf = xmlBufferCreate(); \
- xmlExpDump(xmlExpBuf, exp, 0); \
- xmlBufferWriteChar(xmlExpBuf, "\n"); \
- xmlBufferDump(stdout, xmlExpBuf); \
- xmlBufferFree(xmlExpBuf); \
-}
/************************************************************************
* *
* Expression handling context *
@@ -5515,7 +5506,6 @@ struct _xmlExpCtxt {
const char *expr;
const char *cur;
int nb_cons;
- int nb_del;
int tabSize;
};
@@ -5583,14 +5573,22 @@ xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
* Structure associated to an expression node *
* *
************************************************************************/
-typedef enum {
- XML_EXP_EMPTY = 0,
- XML_EXP_FORBID = 1,
- XML_EXP_ATOM = 2,
- XML_EXP_SEQ = 3,
- XML_EXP_OR = 4,
- XML_EXP_COUNT = 5
-} xmlExpNodeType;
+#define MAX_NODES 10000
+
+/* #define DEBUG_DERIV */
+
+/*
+ * TODO:
+ * - Wildcards
+ * - public API for creation
+ *
+ * Started
+ * - regression testing
+ *
+ * Done
+ * - split into module and test tool
+ * - memleaks
+ */
typedef enum {
XML_EXP_NILABLE = (1 << 0)
@@ -5993,7 +5991,6 @@ xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp) {
xmlExpFree(ctxt, exp->exp_left);
}
xmlFree(exp);
- ctxt->nb_del++;
ctxt->nb_nodes--;
}
}
@@ -6877,6 +6874,326 @@ xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
xmlExpFree(ctxt, tmp);
return(0);
}
+
+/************************************************************************
+ * *
+ * Parsing expression *
+ * *
+ ************************************************************************/
+
+static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
+
+#undef CUR
+#define CUR (*ctxt->cur)
+#undef NEXT
+#define NEXT ctxt->cur++;
+#undef IS_BLANK
+#define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
+#define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
+
+static int
+xmlExpParseNumber(xmlExpCtxtPtr ctxt) {
+ int ret = 0;
+
+ SKIP_BLANKS
+ if (CUR == '*') {
+ NEXT
+ return(-1);
+ }
+ if ((CUR < '0') || (CUR > '9'))
+ return(-1);
+ while ((CUR >= '0') && (CUR <= '9')) {
+ ret = ret * 10 + (CUR - '0');
+ NEXT
+ }
+ return(ret);
+}
+
+static xmlExpNodePtr
+xmlExpParseOr(xmlExpCtxtPtr ctxt) {
+ const char *base;
+ xmlExpNodePtr ret;
+ const xmlChar *val;
+
+ SKIP_BLANKS
+ base = ctxt->cur;
+ if (*ctxt->cur == '(') {
+ NEXT
+ ret = xmlExpParseExpr(ctxt);
+ SKIP_BLANKS
+ if (*ctxt->cur != ')') {
+ fprintf(stderr, "unbalanced '(' : %s\n", base);
+ xmlExpFree(ctxt, ret);
+ return(NULL);
+ }
+ NEXT;
+ SKIP_BLANKS
+ goto parse_quantifier;
+ }
+ while ((CUR != 0) && (!(IS_BLANK(CUR))) && (CUR != '(') &&
+ (CUR != ')') && (CUR != '|') && (CUR != ',') && (CUR != '{') &&
+ (CUR != '*') && (CUR != '+') && (CUR != '?') && (CUR != '}'))
+ NEXT;
+ val = xmlDictLookup(ctxt->dict, BAD_CAST base, ctxt->cur - base);
+ if (val == NULL)
+ return(NULL);
+ ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, val, 0, 0);
+ if (ret == NULL)
+ return(NULL);
+ SKIP_BLANKS
+parse_quantifier:
+ if (CUR == '{') {
+ int min, max;
+
+ NEXT
+ min = xmlExpParseNumber(ctxt);
+ if (min < 0) {
+ xmlExpFree(ctxt, ret);
+ return(NULL);
+ }
+ SKIP_BLANKS
+ if (CUR == ',') {
+ NEXT
+ max = xmlExpParseNumber(ctxt);
+ SKIP_BLANKS
+ } else
+ max = min;
+ if (CUR != '}') {
+ xmlExpFree(ctxt, ret);
+ return(NULL);
+ }
+ NEXT
+ ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
+ min, max);
+ SKIP_BLANKS
+ } else if (CUR == '?') {
+ NEXT
+ ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
+ 0, 1);
+ SKIP_BLANKS
+ } else if (CUR == '+') {
+ NEXT
+ ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
+ 1, -1);
+ SKIP_BLANKS
+ } else if (CUR == '*') {
+ NEXT
+ ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
+ 0, -1);
+ SKIP_BLANKS
+ }
+ return(ret);
+}
+
+
+static xmlExpNodePtr
+xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
+ xmlExpNodePtr ret, right;
+
+ ret = xmlExpParseOr(ctxt);
+ SKIP_BLANKS
+ while (CUR == '|') {
+ NEXT
+ right = xmlExpParseOr(ctxt);
+ if (right == NULL) {
+ xmlExpFree(ctxt, ret);
+ return(NULL);
+ }
+ ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, right, NULL, 0, 0);
+ if (ret == NULL)
+ return(NULL);
+ }
+ return(ret);
+}
+
+static xmlExpNodePtr
+xmlExpParseExpr(xmlExpCtxtPtr ctxt) {
+ xmlExpNodePtr ret, right;
+
+ ret = xmlExpParseSeq(ctxt);
+ SKIP_BLANKS
+ while (CUR == ',') {
+ NEXT
+ right = xmlExpParseSeq(ctxt);
+ if (right == NULL) {
+ xmlExpFree(ctxt, ret);
+ return(NULL);
+ }
+ ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, right, NULL, 0, 0);
+ if (ret == NULL)
+ return(NULL);
+ }
+ return(ret);
+}
+
+/**
+ * xmlExpParse:
+ * @ctxt: the expressions context
+ * @expr: the 0 terminated string
+ *
+ * Minimal parser for regexps, it understand the following constructs
+ * - string terminals
+ * - choice operator |
+ * - sequence operator ,
+ * - subexpressions (...)
+ * - usual cardinality operators + * and ?
+ * - finite sequences { min, max }
+ * - infinite sequences { min, * }
+ * There is minimal checkings made especially no checking on strings values
+ *
+ * Returns a new expression or NULL in case of failure
+ */
+xmlExpNodePtr
+xmlExpParse(xmlExpCtxtPtr ctxt, const char *expr) {
+ xmlExpNodePtr ret;
+
+ ctxt->expr = expr;
+ ctxt->cur = expr;
+
+ ret = xmlExpParseExpr(ctxt);
+ SKIP_BLANKS
+ if (*ctxt->cur != 0) {
+ xmlExpFree(ctxt, ret);
+ return(NULL);
+ }
+ return(ret);
+}
+
+static void
+xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
+ xmlExpNodePtr c;
+
+ if (expr == NULL) return;
+ if (glob) xmlBufferWriteChar(buf, "(");
+ switch (expr->type) {
+ case XML_EXP_EMPTY:
+ xmlBufferWriteChar(buf, "empty");
+ break;
+ case XML_EXP_FORBID:
+ xmlBufferWriteChar(buf, "forbidden");
+ break;
+ case XML_EXP_ATOM:
+ xmlBufferWriteCHAR(buf, expr->exp_str);
+ break;
+ case XML_EXP_SEQ:
+ c = expr->exp_left;
+ if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
+ xmlExpDumpInt(buf, c, 1);
+ else
+ xmlExpDumpInt(buf, c, 0);
+ xmlBufferWriteChar(buf, " , ");
+ c = expr->exp_right;
+ if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
+ xmlExpDumpInt(buf, c, 1);
+ else
+ xmlExpDumpInt(buf, c, 0);
+ break;
+ case XML_EXP_OR:
+ c = expr->exp_left;
+ if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
+ xmlExpDumpInt(buf, c, 1);
+ else
+ xmlExpDumpInt(buf, c, 0);
+ xmlBufferWriteChar(buf, " | ");
+ c = expr->exp_right;
+ if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
+ xmlExpDumpInt(buf, c, 1);
+ else
+ xmlExpDumpInt(buf, c, 0);
+ break;
+ case XML_EXP_COUNT: {
+ char rep[40];
+
+ c = expr->exp_left;
+ if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
+ xmlExpDumpInt(buf, c, 1);
+ else
+ xmlExpDumpInt(buf, c, 0);
+ if ((expr->exp_min == 0) && (expr->exp_max == 1)) {
+ rep[0] = '?';
+ rep[1] = 0;
+ } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) {
+ rep[0] = '*';
+ rep[1] = 0;
+ } else if ((expr->exp_min == 1) && (expr->exp_max == -1)) {
+ rep[0] = '+';
+ rep[1] = 0;
+ } else if (expr->exp_max == expr->exp_min) {
+ snprintf(rep, 39, "{%d}", expr->exp_min);
+ } else if (expr->exp_max < 0) {
+ snprintf(rep, 39, "{%d,inf}", expr->exp_min);
+ } else {
+ snprintf(rep, 39, "{%d,%d}", expr->exp_min, expr->exp_max);
+ }
+ rep[39] = 0;
+ xmlBufferWriteChar(buf, rep);
+ break;
+ }
+ default:
+ fprintf(stderr, "Error in tree\n");
+ }
+ if (glob)
+ xmlBufferWriteChar(buf, ")");
+}
+/**
+ * xmlExpDump:
+ * @buf: a buffer to receive the output
+ * @expr: the compiled expression
+ *
+ * Serialize the expression as compiled to the buffer
+ */
+void
+xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr exp) {
+ if ((buf == NULL) || (exp == NULL))
+ return;
+ xmlExpDumpInt(buf, exp, 0);
+}
+
+/**
+ * xmlExpMaxToken:
+ * @expr: a compiled expression
+ *
+ * Indicate the maximum number of input a expression can accept
+ *
+ * Returns the maximum length or -1 in case of error
+ */
+int
+xmlExpMaxToken(xmlExpNodePtr expr) {
+ if (expr == NULL)
+ return(-1);
+ return(expr->c_max);
+}
+
+/**
+ * xmlExpCtxtNbNodes:
+ * @ctxt: an expression context
+ *
+ * Debugging facility provides the number of allocated nodes at a that point
+ *
+ * Returns the number of nodes in use or -1 in case of error
+ */
+int
+xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) {
+ if (ctxt == NULL)
+ return(-1);
+ return(ctxt->nb_nodes);
+}
+
+/**
+ * xmlExpCtxtNbCons:
+ * @ctxt: an expression context
+ *
+ * Debugging facility provides the number of allocated nodes over lifetime
+ *
+ * Returns the number of nodes ever allocated or -1 in case of error
+ */
+int
+xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) {
+ if (ctxt == NULL)
+ return(-1);
+ return(ctxt->nb_cons);
+}
+
#endif /* LIBXML_EXPR_ENABLED */
#define bottom_xmlregexp
#include "elfgcchack.h"