aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--doc/xml.html161
-rw-r--r--entities.c2
-rw-r--r--entities.h1
-rw-r--r--include/libxml/entities.h1
-rw-r--r--tester.c42
-rw-r--r--tree.c204
7 files changed, 318 insertions, 100 deletions
diff --git a/ChangeLog b/ChangeLog
index 21fb605a..07a9ce96 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+Thu Oct 29 00:48:45 EST 1998 Daniel Veillard <Daniel.Veillard@w3.org>
+
+ * tree.[ch]: bug fixing
+ * entities.[ch]: defined a specific type for predefined entities
+ * doc/xml.html: more documentation on the library, how to use it,
+ overview of the interfaces.
+
Wed Oct 28 17:56:35 EST 1998 Daniel Veillard <Daniel.Veillard@w3.org>
* tree.[ch]: more cleanup on the API, made the tree mor conformant.
diff --git a/doc/xml.html b/doc/xml.html
index 93d85ff3..5500349c 100644
--- a/doc/xml.html
+++ b/doc/xml.html
@@ -2,7 +2,7 @@
"http://www.w3.org/TR/REC-html40/loose.dtd">
<html>
<head>
-<title>No title</title>
+<title>The XML library for Gnome</title>
<meta name="GENERATOR" content="amaya V1.3b">
</head>
<body bgcolor="#ffffff">
@@ -75,18 +75,22 @@ standalone=true
content= too
ELEMENT head
ELEMENT title
- content=Welcome to Gnome
+ TEXT
+ content=Welcome to Gnome
ELEMENT chapter
ELEMENT title
- content=The Linux adventure
+ TEXT
+ content=The Linux adventure
ELEMENT p
- content=bla bla bla ...
+ TEXT
+ content=bla bla bla ...
ELEMENT image
ATTRIBUTE href
TEXT
content=linus.gif
ELEMENT p
- content=...</pre>
+ TEXT
+ content=...</pre>
<p>
This should be useful to learn the internal representation model.</p>
@@ -106,19 +110,19 @@ Usually, the first thing to do is to read an XML input, the parser accepts to
parse both memory mapped documents or direct files. The functions are defined
in "parser.h":</p>
<dl>
-<dt>xmlDocPtr xmlParseMemory(char *buffer, int size);</dt>
+<dt><code>xmlDocPtr xmlParseMemory(char *buffer, int size);</code></dt>
<dd><p>
parse a zero terminated string containing the document</p>
</dd>
</dl>
<dl>
-<dt>xmlDocPtr xmlParseFile(const char *filename);</dt>
+<dt><code>xmlDocPtr xmlParseFile(const char *filename);</code></dt>
<dd><p>
parse an XML document contained in a file (possibly compressed)</p>
</dd>
</dl>
<p>
- This returns a pointer to the document structure (or NULL in case of
+This returns a pointer to the document structure (or NULL in case of
failure).</p>
<p>
A couple of comments can be made, first this mean that the parser is
@@ -126,25 +130,156 @@ memory-hungry, first to load the document in memory, second to build the tree.
Reading a document without building the tree will be possible in the future by
pluggin the code to the SAX interface (see SAX.c).</p>
+<h3>Building a tree from scratch</h3>
+<p>
+The other way to get an XML tree in memory is by building it. Basically there
+is a set of functions dedicated to building new elements, those are also
+described in "tree.h", here is for example the piece of code producing the
+example used before:</p>
+<pre> xmlDocPtr doc;
+ xmlNodePtr tree, subtree;
+
+ doc = xmlNewDoc("1.0");
+ doc->root = xmlNewDocNode(doc, NULL, "EXAMPLE", NULL);
+ xmlSetProp(doc->root, "prop1", "gnome is great");
+ xmlSetProp(doc->root, "prop2", "&amp;linux; too");
+ tree = xmlNewChild(doc->root, NULL, "head", NULL);
+ subtree = xmlNewChild(tree, NULL, "title", "Welcome to Gnome");
+ tree = xmlNewChild(doc->root, NULL, "chapter", NULL);
+ subtree = xmlNewChild(tree, NULL, "title", "The Linux adventure");
+ subtree = xmlNewChild(tree, NULL, "p", "bla bla bla ...");
+ subtree = xmlNewChild(tree, NULL, "image", NULL);
+ xmlSetProp(subtree, "href", "linus.gif");</pre>
+<p>
+Not really rocket science ...</p>
+
<h3>Traversing the tree</h3>
<p>
Basically by including "tree.h" your code has access to the internal structure
of all the element of the tree. The names should be somewhat simple like
<strong>parent</strong>, <strong>childs</strong>, <strong>next</strong>,
-<strong>prev</strong>, <strong>properties</strong>, etc... </p>
+<strong>prev</strong>, <strong>properties</strong>, etc... For example still
+with the previous example:</p>
+<pre><code>doc->root->childs->childs</code></pre>
+<p>
+points to the title element,</p>
+<pre>doc->root->childs->next->child->child</pre>
+<p>
+points to the text node containing the chapter titlle "The Linux adventure"
+and</p>
+<pre>doc->root->properties->next->val</pre>
+<p>
+points to the entity reference containing the value of "&amp;linux" at the
+beginning of the second attribute of the root element "EXAMPLE".</p>
<h3>Modifying the tree</h3>
+<p>
+functions are provided to read and write the document content:</p>
+<dl>
+<dt><code>xmlAttrPtr xmlSetProp(xmlNodePtr node, const CHAR *name, const CHAR
+*value);</code></dt>
+<dd><p>
+This set (or change) an attribute carried by an ELEMENT node the value can be
+NULL</p>
+</dd>
+</dl>
+<dl>
+<dt><code>const CHAR *xmlGetProp(xmlNodePtr node, const CHAR
+*name);</code></dt>
+<dd><p>
+This function returns a pointer to the property content, note that no extra
+copy is made</p>
+</dd>
+</dl>
+<p>
+Two functions must be used to read an write the text associated to
+elements:</p>
+<dl>
+<dt><code>xmlNodePtr xmlStringGetNodeList(xmlDocPtr doc, const CHAR
+*value);</code></dt>
+<dd><p>
+This function takes an "external" string and convert it to one text node or
+possibly to a list of entity and text nodes. All non-predefined entity
+references like &amp;Gnome; will be stored internally as an entity node, hence
+the result of the function may not be a single node.</p>
+</dd>
+</dl>
+<dl>
+<dt><code>CHAR *xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int
+inLine);</code></dt>
+<dd><p>
+this is the dual function, which generate a new string containing the content
+of the text and entity nodes. Note the extra argument inLine, if set to 1
+instead of returning the &amp;Gnome; XML encoding in the string it will
+substitute it with it's value say "GNU Network Object Model Environment". Set
+it if you want to use the string for non XML usage like User Interface.</p>
+</dd>
+</dl>
<h3>Saving a tree</h3>
+<p>
+Basically 3 options are possible:</p>
+<dl>
+<dt><code>void xmlDocDumpMemory(xmlDocPtr cur, CHAR**mem, int
+*size);</code></dt>
+<dd><p>
+returns a buffer where the document has been saved</p>
+</dd>
+</dl>
+<dl>
+<dt><code>extern void xmlDocDump(FILE *f, xmlDocPtr doc);</code></dt>
+<dd><p>
+dumps a buffer to an open file descriptor</p>
+</dd>
+</dl>
+<dl>
+<dt><code>int xmlSaveFile(const char *filename, xmlDocPtr cur);</code></dt>
+<dd><p>
+save the document ot a file. In that case the compression interface is
+triggered if turned on</p>
+</dd>
+</dl>
+
+<h3>Compression</h3>
+<p>
+The library handle transparently compression when doing file based accesses,
+the level of compression on saves can be tuned either globally or individually
+for one file:</p>
+<dl>
+<dt><code>int xmlGetDocCompressMode (xmlDocPtr doc);</code></dt>
+<dd><p>
+Get the document compression ratio (0-9)</p>
+</dd>
+</dl>
+<dl>
+<dt><code>void xmlSetDocCompressMode (xmlDocPtr doc, int mode);</code></dt>
+<dd><p>
+Set the document compression ratio</p>
+</dd>
+</dl>
+<dl>
+<dt><code>int xmlGetCompressMode(void);</code></dt>
+<dd><p>
+Get the default compression ratio</p>
+</dd>
+</dl>
+<dl>
+<dt><code>void xmlSetCompressMode(int mode);</code></dt>
+<dd><p>
+set the default compression ratio</p>
+</dd>
+</dl>
-<h2><a name="DOM">DOM interfaces</a></h2>
+<h2><a name="DOM">DOM Principles</a></h2>
<p>
<a href="http://www.w3.org/DOM/">DOM</a> stands for the <em>Document Object
Model</em> this is an API for accessing XML or HTML structured documents.
Native support for DOM in Gnome is on the way (module gnome-dom), and it will
-be based on gnome-xml. DOM defiles a set of IDL (or Java) interfaces allowing
-to traverse and manipulate a document. The DOM library will allow accessing
-and modifying "live" documents presents on other programs like this:</p>
+be based on gnome-xml. This will be a far cleaner interface to manipulate XML
+files within Gnome since it won't expose the internal structure. DOM defiles a
+set of IDL (or Java) interfaces allowing to traverse and manipulate a
+document. The DOM library will allow accessing and modifying "live" documents
+presents on other programs like this:</p>
<p>
<img src="DOM.gif" alt=" DOM.gif "></p>
<p>
diff --git a/entities.c b/entities.c
index 351746ce..5d9d88a4 100644
--- a/entities.c
+++ b/entities.c
@@ -138,7 +138,7 @@ void xmlInitializePredefinedEntities(void) {
out = &value[0];
for (;(*out++ = (CHAR) *in);)in++;
xmlAddEntity(xmlPredefinedEntities, (const CHAR *) &name[0],
- XML_INTERNAL_GENERAL_ENTITY, NULL, NULL,
+ XML_INTERNAL_PREDEFINED_ENTITY, NULL, NULL,
&value[0]);
}
}
diff --git a/entities.h b/entities.h
index 6f971c08..f6b03c3e 100644
--- a/entities.h
+++ b/entities.h
@@ -20,6 +20,7 @@ extern "C" {
#define XML_EXTERNAL_GENERAL_UNPARSED_ENTITY 3
#define XML_INTERNAL_PARAMETER_ENTITY 4
#define XML_EXTERNAL_PARAMETER_ENTITY 5
+#define XML_INTERNAL_PREDEFINED_ENTITY 6
/*
* An unit of storage for an entity, contains the string, the value
diff --git a/include/libxml/entities.h b/include/libxml/entities.h
index 6f971c08..f6b03c3e 100644
--- a/include/libxml/entities.h
+++ b/include/libxml/entities.h
@@ -20,6 +20,7 @@ extern "C" {
#define XML_EXTERNAL_GENERAL_UNPARSED_ENTITY 3
#define XML_INTERNAL_PARAMETER_ENTITY 4
#define XML_EXTERNAL_PARAMETER_ENTITY 5
+#define XML_INTERNAL_PREDEFINED_ENTITY 6
/*
* An unit of storage for an entity, contains the string, the value
diff --git a/tester.c b/tester.c
index 07a41174..56d74bea 100644
--- a/tester.c
+++ b/tester.c
@@ -56,6 +56,42 @@ static CHAR buffer[] =
\n\
";
+/************************************************************************
+ * *
+ * Debug *
+ * *
+ ************************************************************************/
+
+int treeTest(void) {
+ /*
+ * build a fake XML document
+ */
+ xmlDocPtr doc;
+ xmlNodePtr tree, subtree;
+
+ doc = xmlNewDoc("1.0");
+ doc->root = xmlNewDocNode(doc, NULL, "EXAMPLE", NULL);
+ xmlSetProp(doc->root, "prop1", "gnome is great");
+ xmlSetProp(doc->root, "prop2", "&linux; too");
+ tree = xmlNewChild(doc->root, NULL, "head", NULL);
+ subtree = xmlNewChild(tree, NULL, "title", "Welcome to Gnome");
+ tree = xmlNewChild(doc->root, NULL, "chapter", NULL);
+ subtree = xmlNewChild(tree, NULL, "title", "The Linux adventure");
+ subtree = xmlNewChild(tree, NULL, "p", "bla bla bla ...");
+ subtree = xmlNewChild(tree, NULL, "image", NULL);
+ xmlSetProp(subtree, "href", "linus.gif");
+
+ /*
+ * print it.
+ */
+ xmlDocDump(stdout, doc);
+
+ /*
+ * free it.
+ */
+ xmlFreeDoc(doc);
+ return(0);
+}
void parseAndPrintFile(char *filename) {
xmlDocPtr doc;
@@ -111,8 +147,12 @@ int main(int argc, char **argv) {
else
debug++;
}
- } else
+ } else {
+ printf("\nFirst test for the parser, with errors\n");
parseAndPrintBuffer(buffer);
+ printf("\nBuilding a tree from scratch and printing it\n");
+ treeTest();
+ }
return(0);
}
diff --git a/tree.c b/tree.c
index aae8dbe2..a477934b 100644
--- a/tree.c
+++ b/tree.c
@@ -372,22 +372,34 @@ xmlStringLenGetNodeList(xmlDocPtr doc, const CHAR *value, int len) {
CHAR *val;
const CHAR *cur = value;
const CHAR *q;
+ xmlEntityPtr ent;
if (value == NULL) return(NULL);
q = cur;
while ((*cur != 0) && (cur - value < len)) {
if (*cur == '&') {
+ /*
+ * Save the current text.
+ */
if (cur != q) {
- node = xmlNewDocTextLen(doc, q, cur - q);
- if (node == NULL) return(ret);
- if (last == NULL)
- last = ret = node;
- else {
- last->next = node;
- last = node;
+ if ((last != NULL) && (last->type == XML_TEXT_NODE)) {
+ xmlNodeAddContentLen(last, q, cur - q);
+ } else {
+ node = xmlNewDocTextLen(doc, q, cur - q);
+ if (node == NULL) return(ret);
+ if (last == NULL)
+ last = ret = node;
+ else {
+ last->next = node;
+ node->prev = last;
+ last = node;
+ }
}
}
+ /*
+ * Read the entity string
+ */
cur++;
q = cur;
while ((*cur != 0) && (cur - value < len) && (*cur != ';')) cur++;
@@ -397,14 +409,32 @@ xmlStringLenGetNodeList(xmlDocPtr doc, const CHAR *value, int len) {
return(ret);
}
if (cur != q) {
+ /*
+ * Predefined entities don't generate nodes
+ */
val = xmlStrndup(q, cur - q);
- node = xmlNewReference(doc, val);
- if (node == NULL) return(ret);
- if (last == NULL)
- last = ret = node;
- else {
- last->next = node;
- last = node;
+ ent = xmlGetDocEntity(doc, val);
+ if ((ent != NULL) &&
+ (ent->type == XML_INTERNAL_PREDEFINED_ENTITY)) {
+ if (last == NULL) {
+ node = xmlNewDocText(doc, ent->content);
+ last = ret = node;
+ } else
+ xmlNodeAddContent(last, ent->content);
+
+ } else {
+ /*
+ * Create a new REFERENCE_REF node
+ */
+ node = xmlNewReference(doc, val);
+ if (node == NULL) return(ret);
+ if (last == NULL)
+ last = ret = node;
+ else {
+ last->next = node;
+ node->prev = last;
+ last = node;
+ }
}
free(val);
}
@@ -414,13 +444,21 @@ xmlStringLenGetNodeList(xmlDocPtr doc, const CHAR *value, int len) {
cur++;
}
if (cur != q) {
- node = xmlNewDocTextLen(doc, q, cur - q);
- if (node == NULL) return(ret);
- if (last == NULL)
- last = ret = node;
- else {
- last->next = node;
- last = node;
+ /*
+ * Handle the last piece of text.
+ */
+ if ((last != NULL) && (last->type == XML_TEXT_NODE)) {
+ xmlNodeAddContentLen(last, q, cur - q);
+ } else {
+ node = xmlNewDocTextLen(doc, q, cur - q);
+ if (node == NULL) return(ret);
+ if (last == NULL)
+ last = ret = node;
+ else {
+ last->next = node;
+ node->prev = last;
+ last = node;
+ }
}
}
return(ret);
@@ -442,22 +480,34 @@ xmlStringGetNodeList(xmlDocPtr doc, const CHAR *value) {
CHAR *val;
const CHAR *cur = value;
const CHAR *q;
+ xmlEntityPtr ent;
if (value == NULL) return(NULL);
q = cur;
while (*cur != 0) {
if (*cur == '&') {
+ /*
+ * Save the current text.
+ */
if (cur != q) {
- node = xmlNewDocTextLen(doc, q, cur - q);
- if (node == NULL) return(ret);
- if (last == NULL)
- last = ret = node;
- else {
- last->next = node;
- last = node;
+ if ((last != NULL) && (last->type == XML_TEXT_NODE)) {
+ xmlNodeAddContentLen(last, q, cur - q);
+ } else {
+ node = xmlNewDocTextLen(doc, q, cur - q);
+ if (node == NULL) return(ret);
+ if (last == NULL)
+ last = ret = node;
+ else {
+ last->next = node;
+ node->prev = last;
+ last = node;
+ }
}
}
+ /*
+ * Read the entity string
+ */
cur++;
q = cur;
while ((*cur != 0) && (*cur != ';')) cur++;
@@ -467,14 +517,33 @@ xmlStringGetNodeList(xmlDocPtr doc, const CHAR *value) {
return(ret);
}
if (cur != q) {
+ /*
+ * Predefined entities don't generate nodes
+ */
val = xmlStrndup(q, cur - q);
- node = xmlNewReference(doc, val);
- if (node == NULL) return(ret);
- if (last == NULL)
- last = ret = node;
- else {
- last->next = node;
- last = node;
+ ent = xmlGetDocEntity(doc, val);
+ if ((ent != NULL) &&
+ (ent->type == XML_INTERNAL_PREDEFINED_ENTITY)) {
+ if (last == NULL) {
+ node = xmlNewDocText(doc, ent->content);
+ last = ret = node;
+ } else
+ xmlNodeAddContent(last, ent->content);
+
+ } else {
+ /*
+ * Create a new REFERENCE_REF node
+ */
+ val = xmlStrndup(q, cur - q);
+ node = xmlNewReference(doc, val);
+ if (node == NULL) return(ret);
+ if (last == NULL)
+ last = ret = node;
+ else {
+ last->next = node;
+ node->prev = last;
+ last = node;
+ }
}
free(val);
}
@@ -484,13 +553,21 @@ xmlStringGetNodeList(xmlDocPtr doc, const CHAR *value) {
cur++;
}
if (cur != q) {
- node = xmlNewDocTextLen(doc, q, cur - q);
- if (node == NULL) return(ret);
- if (last == NULL)
- last = ret = node;
- else {
- last->next = node;
- last = node;
+ /*
+ * Handle the last piece of text.
+ */
+ if ((last != NULL) && (last->type == XML_TEXT_NODE)) {
+ xmlNodeAddContentLen(last, q, cur - q);
+ } else {
+ node = xmlNewDocTextLen(doc, q, cur - q);
+ if (node == NULL) return(ret);
+ if (last == NULL)
+ last = ret = node;
+ else {
+ last->next = node;
+ node->prev = last;
+ last = node;
+ }
}
}
return(ret);
@@ -2129,46 +2206,3 @@ xmlSaveFile(const char *filename, xmlDocPtr cur) {
return(ret * sizeof(CHAR));
}
-/************************************************************************
- * *
- * Debug *
- * *
- ************************************************************************/
-
-#ifdef STANDALONE
-int main(void) {
- xmlDocPtr doc;
- xmlNodePtr tree, subtree;
- xmlNsPtr ns1;
- xmlNsPtr ns2;
-
- /*
- * build a fake XML document
- */
- doc = xmlNewDoc("1.0");
- ns1 = xmlNewNs(doc, "http://www.ietf.org/standards/dav/", "D");
- ns2 = xmlNewNs(doc, "http://www.w3.com/standards/z39.50/", "Z");
- doc->root = xmlNewDocNode(doc, ns1, "multistatus", NULL);
- tree = xmlNewChild(doc->root, NULL, "response", NULL);
- subtree = xmlNewChild(tree, NULL, "prop", NULL);
- xmlNewChild(subtree, ns2, "Authors", NULL);
- subtree = xmlNewChild(tree, NULL, "status", "HTTP/1.1 420 Method Failure");
- tree = xmlNewChild(doc->root, NULL, "response", NULL);
- subtree = xmlNewChild(tree, NULL, "prop", NULL);
- xmlNewChild(subtree, ns2, "Copyright-Owner", NULL);
- subtree = xmlNewChild(tree, NULL, "status", "HTTP/1.1 409 Conflict");
- tree = xmlNewChild(doc->root, NULL, "responsedescription",
- "Copyright Owner can not be deleted or altered");
-
- /*
- * print it.
- */
- xmlDocDump(stdout, doc);
-
- /*
- * free it.
- */
- xmlFreeDoc(doc);
- return(0);
-}
-#endif