diff options
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | doc/xml.html | 161 | ||||
-rw-r--r-- | entities.c | 2 | ||||
-rw-r--r-- | entities.h | 1 | ||||
-rw-r--r-- | include/libxml/entities.h | 1 | ||||
-rw-r--r-- | tester.c | 42 | ||||
-rw-r--r-- | tree.c | 204 |
7 files changed, 318 insertions, 100 deletions
@@ -1,3 +1,10 @@ +Thu Oct 29 00:48:45 EST 1998 Daniel Veillard <Daniel.Veillard@w3.org> + + * tree.[ch]: bug fixing + * entities.[ch]: defined a specific type for predefined entities + * doc/xml.html: more documentation on the library, how to use it, + overview of the interfaces. + Wed Oct 28 17:56:35 EST 1998 Daniel Veillard <Daniel.Veillard@w3.org> * tree.[ch]: more cleanup on the API, made the tree mor conformant. diff --git a/doc/xml.html b/doc/xml.html index 93d85ff3..5500349c 100644 --- a/doc/xml.html +++ b/doc/xml.html @@ -2,7 +2,7 @@ "http://www.w3.org/TR/REC-html40/loose.dtd"> <html> <head> -<title>No title</title> +<title>The XML library for Gnome</title> <meta name="GENERATOR" content="amaya V1.3b"> </head> <body bgcolor="#ffffff"> @@ -75,18 +75,22 @@ standalone=true content= too ELEMENT head ELEMENT title - content=Welcome to Gnome + TEXT + content=Welcome to Gnome ELEMENT chapter ELEMENT title - content=The Linux adventure + TEXT + content=The Linux adventure ELEMENT p - content=bla bla bla ... + TEXT + content=bla bla bla ... ELEMENT image ATTRIBUTE href TEXT content=linus.gif ELEMENT p - content=...</pre> + TEXT + content=...</pre> <p> This should be useful to learn the internal representation model.</p> @@ -106,19 +110,19 @@ Usually, the first thing to do is to read an XML input, the parser accepts to parse both memory mapped documents or direct files. The functions are defined in "parser.h":</p> <dl> -<dt>xmlDocPtr xmlParseMemory(char *buffer, int size);</dt> +<dt><code>xmlDocPtr xmlParseMemory(char *buffer, int size);</code></dt> <dd><p> parse a zero terminated string containing the document</p> </dd> </dl> <dl> -<dt>xmlDocPtr xmlParseFile(const char *filename);</dt> +<dt><code>xmlDocPtr xmlParseFile(const char *filename);</code></dt> <dd><p> parse an XML document contained in a file (possibly compressed)</p> </dd> </dl> <p> - This returns a pointer to the document structure (or NULL in case of +This returns a pointer to the document structure (or NULL in case of failure).</p> <p> A couple of comments can be made, first this mean that the parser is @@ -126,25 +130,156 @@ memory-hungry, first to load the document in memory, second to build the tree. Reading a document without building the tree will be possible in the future by pluggin the code to the SAX interface (see SAX.c).</p> +<h3>Building a tree from scratch</h3> +<p> +The other way to get an XML tree in memory is by building it. Basically there +is a set of functions dedicated to building new elements, those are also +described in "tree.h", here is for example the piece of code producing the +example used before:</p> +<pre> xmlDocPtr doc; + xmlNodePtr tree, subtree; + + doc = xmlNewDoc("1.0"); + doc->root = xmlNewDocNode(doc, NULL, "EXAMPLE", NULL); + xmlSetProp(doc->root, "prop1", "gnome is great"); + xmlSetProp(doc->root, "prop2", "&linux; too"); + tree = xmlNewChild(doc->root, NULL, "head", NULL); + subtree = xmlNewChild(tree, NULL, "title", "Welcome to Gnome"); + tree = xmlNewChild(doc->root, NULL, "chapter", NULL); + subtree = xmlNewChild(tree, NULL, "title", "The Linux adventure"); + subtree = xmlNewChild(tree, NULL, "p", "bla bla bla ..."); + subtree = xmlNewChild(tree, NULL, "image", NULL); + xmlSetProp(subtree, "href", "linus.gif");</pre> +<p> +Not really rocket science ...</p> + <h3>Traversing the tree</h3> <p> Basically by including "tree.h" your code has access to the internal structure of all the element of the tree. The names should be somewhat simple like <strong>parent</strong>, <strong>childs</strong>, <strong>next</strong>, -<strong>prev</strong>, <strong>properties</strong>, etc... </p> +<strong>prev</strong>, <strong>properties</strong>, etc... For example still +with the previous example:</p> +<pre><code>doc->root->childs->childs</code></pre> +<p> +points to the title element,</p> +<pre>doc->root->childs->next->child->child</pre> +<p> +points to the text node containing the chapter titlle "The Linux adventure" +and</p> +<pre>doc->root->properties->next->val</pre> +<p> +points to the entity reference containing the value of "&linux" at the +beginning of the second attribute of the root element "EXAMPLE".</p> <h3>Modifying the tree</h3> +<p> +functions are provided to read and write the document content:</p> +<dl> +<dt><code>xmlAttrPtr xmlSetProp(xmlNodePtr node, const CHAR *name, const CHAR +*value);</code></dt> +<dd><p> +This set (or change) an attribute carried by an ELEMENT node the value can be +NULL</p> +</dd> +</dl> +<dl> +<dt><code>const CHAR *xmlGetProp(xmlNodePtr node, const CHAR +*name);</code></dt> +<dd><p> +This function returns a pointer to the property content, note that no extra +copy is made</p> +</dd> +</dl> +<p> +Two functions must be used to read an write the text associated to +elements:</p> +<dl> +<dt><code>xmlNodePtr xmlStringGetNodeList(xmlDocPtr doc, const CHAR +*value);</code></dt> +<dd><p> +This function takes an "external" string and convert it to one text node or +possibly to a list of entity and text nodes. All non-predefined entity +references like &Gnome; will be stored internally as an entity node, hence +the result of the function may not be a single node.</p> +</dd> +</dl> +<dl> +<dt><code>CHAR *xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int +inLine);</code></dt> +<dd><p> +this is the dual function, which generate a new string containing the content +of the text and entity nodes. Note the extra argument inLine, if set to 1 +instead of returning the &Gnome; XML encoding in the string it will +substitute it with it's value say "GNU Network Object Model Environment". Set +it if you want to use the string for non XML usage like User Interface.</p> +</dd> +</dl> <h3>Saving a tree</h3> +<p> +Basically 3 options are possible:</p> +<dl> +<dt><code>void xmlDocDumpMemory(xmlDocPtr cur, CHAR**mem, int +*size);</code></dt> +<dd><p> +returns a buffer where the document has been saved</p> +</dd> +</dl> +<dl> +<dt><code>extern void xmlDocDump(FILE *f, xmlDocPtr doc);</code></dt> +<dd><p> +dumps a buffer to an open file descriptor</p> +</dd> +</dl> +<dl> +<dt><code>int xmlSaveFile(const char *filename, xmlDocPtr cur);</code></dt> +<dd><p> +save the document ot a file. In that case the compression interface is +triggered if turned on</p> +</dd> +</dl> + +<h3>Compression</h3> +<p> +The library handle transparently compression when doing file based accesses, +the level of compression on saves can be tuned either globally or individually +for one file:</p> +<dl> +<dt><code>int xmlGetDocCompressMode (xmlDocPtr doc);</code></dt> +<dd><p> +Get the document compression ratio (0-9)</p> +</dd> +</dl> +<dl> +<dt><code>void xmlSetDocCompressMode (xmlDocPtr doc, int mode);</code></dt> +<dd><p> +Set the document compression ratio</p> +</dd> +</dl> +<dl> +<dt><code>int xmlGetCompressMode(void);</code></dt> +<dd><p> +Get the default compression ratio</p> +</dd> +</dl> +<dl> +<dt><code>void xmlSetCompressMode(int mode);</code></dt> +<dd><p> +set the default compression ratio</p> +</dd> +</dl> -<h2><a name="DOM">DOM interfaces</a></h2> +<h2><a name="DOM">DOM Principles</a></h2> <p> <a href="http://www.w3.org/DOM/">DOM</a> stands for the <em>Document Object Model</em> this is an API for accessing XML or HTML structured documents. Native support for DOM in Gnome is on the way (module gnome-dom), and it will -be based on gnome-xml. DOM defiles a set of IDL (or Java) interfaces allowing -to traverse and manipulate a document. The DOM library will allow accessing -and modifying "live" documents presents on other programs like this:</p> +be based on gnome-xml. This will be a far cleaner interface to manipulate XML +files within Gnome since it won't expose the internal structure. DOM defiles a +set of IDL (or Java) interfaces allowing to traverse and manipulate a +document. The DOM library will allow accessing and modifying "live" documents +presents on other programs like this:</p> <p> <img src="DOM.gif" alt=" DOM.gif "></p> <p> @@ -138,7 +138,7 @@ void xmlInitializePredefinedEntities(void) { out = &value[0]; for (;(*out++ = (CHAR) *in);)in++; xmlAddEntity(xmlPredefinedEntities, (const CHAR *) &name[0], - XML_INTERNAL_GENERAL_ENTITY, NULL, NULL, + XML_INTERNAL_PREDEFINED_ENTITY, NULL, NULL, &value[0]); } } @@ -20,6 +20,7 @@ extern "C" { #define XML_EXTERNAL_GENERAL_UNPARSED_ENTITY 3 #define XML_INTERNAL_PARAMETER_ENTITY 4 #define XML_EXTERNAL_PARAMETER_ENTITY 5 +#define XML_INTERNAL_PREDEFINED_ENTITY 6 /* * An unit of storage for an entity, contains the string, the value diff --git a/include/libxml/entities.h b/include/libxml/entities.h index 6f971c08..f6b03c3e 100644 --- a/include/libxml/entities.h +++ b/include/libxml/entities.h @@ -20,6 +20,7 @@ extern "C" { #define XML_EXTERNAL_GENERAL_UNPARSED_ENTITY 3 #define XML_INTERNAL_PARAMETER_ENTITY 4 #define XML_EXTERNAL_PARAMETER_ENTITY 5 +#define XML_INTERNAL_PREDEFINED_ENTITY 6 /* * An unit of storage for an entity, contains the string, the value @@ -56,6 +56,42 @@ static CHAR buffer[] = \n\ "; +/************************************************************************ + * * + * Debug * + * * + ************************************************************************/ + +int treeTest(void) { + /* + * build a fake XML document + */ + xmlDocPtr doc; + xmlNodePtr tree, subtree; + + doc = xmlNewDoc("1.0"); + doc->root = xmlNewDocNode(doc, NULL, "EXAMPLE", NULL); + xmlSetProp(doc->root, "prop1", "gnome is great"); + xmlSetProp(doc->root, "prop2", "&linux; too"); + tree = xmlNewChild(doc->root, NULL, "head", NULL); + subtree = xmlNewChild(tree, NULL, "title", "Welcome to Gnome"); + tree = xmlNewChild(doc->root, NULL, "chapter", NULL); + subtree = xmlNewChild(tree, NULL, "title", "The Linux adventure"); + subtree = xmlNewChild(tree, NULL, "p", "bla bla bla ..."); + subtree = xmlNewChild(tree, NULL, "image", NULL); + xmlSetProp(subtree, "href", "linus.gif"); + + /* + * print it. + */ + xmlDocDump(stdout, doc); + + /* + * free it. + */ + xmlFreeDoc(doc); + return(0); +} void parseAndPrintFile(char *filename) { xmlDocPtr doc; @@ -111,8 +147,12 @@ int main(int argc, char **argv) { else debug++; } - } else + } else { + printf("\nFirst test for the parser, with errors\n"); parseAndPrintBuffer(buffer); + printf("\nBuilding a tree from scratch and printing it\n"); + treeTest(); + } return(0); } @@ -372,22 +372,34 @@ xmlStringLenGetNodeList(xmlDocPtr doc, const CHAR *value, int len) { CHAR *val; const CHAR *cur = value; const CHAR *q; + xmlEntityPtr ent; if (value == NULL) return(NULL); q = cur; while ((*cur != 0) && (cur - value < len)) { if (*cur == '&') { + /* + * Save the current text. + */ if (cur != q) { - node = xmlNewDocTextLen(doc, q, cur - q); - if (node == NULL) return(ret); - if (last == NULL) - last = ret = node; - else { - last->next = node; - last = node; + if ((last != NULL) && (last->type == XML_TEXT_NODE)) { + xmlNodeAddContentLen(last, q, cur - q); + } else { + node = xmlNewDocTextLen(doc, q, cur - q); + if (node == NULL) return(ret); + if (last == NULL) + last = ret = node; + else { + last->next = node; + node->prev = last; + last = node; + } } } + /* + * Read the entity string + */ cur++; q = cur; while ((*cur != 0) && (cur - value < len) && (*cur != ';')) cur++; @@ -397,14 +409,32 @@ xmlStringLenGetNodeList(xmlDocPtr doc, const CHAR *value, int len) { return(ret); } if (cur != q) { + /* + * Predefined entities don't generate nodes + */ val = xmlStrndup(q, cur - q); - node = xmlNewReference(doc, val); - if (node == NULL) return(ret); - if (last == NULL) - last = ret = node; - else { - last->next = node; - last = node; + ent = xmlGetDocEntity(doc, val); + if ((ent != NULL) && + (ent->type == XML_INTERNAL_PREDEFINED_ENTITY)) { + if (last == NULL) { + node = xmlNewDocText(doc, ent->content); + last = ret = node; + } else + xmlNodeAddContent(last, ent->content); + + } else { + /* + * Create a new REFERENCE_REF node + */ + node = xmlNewReference(doc, val); + if (node == NULL) return(ret); + if (last == NULL) + last = ret = node; + else { + last->next = node; + node->prev = last; + last = node; + } } free(val); } @@ -414,13 +444,21 @@ xmlStringLenGetNodeList(xmlDocPtr doc, const CHAR *value, int len) { cur++; } if (cur != q) { - node = xmlNewDocTextLen(doc, q, cur - q); - if (node == NULL) return(ret); - if (last == NULL) - last = ret = node; - else { - last->next = node; - last = node; + /* + * Handle the last piece of text. + */ + if ((last != NULL) && (last->type == XML_TEXT_NODE)) { + xmlNodeAddContentLen(last, q, cur - q); + } else { + node = xmlNewDocTextLen(doc, q, cur - q); + if (node == NULL) return(ret); + if (last == NULL) + last = ret = node; + else { + last->next = node; + node->prev = last; + last = node; + } } } return(ret); @@ -442,22 +480,34 @@ xmlStringGetNodeList(xmlDocPtr doc, const CHAR *value) { CHAR *val; const CHAR *cur = value; const CHAR *q; + xmlEntityPtr ent; if (value == NULL) return(NULL); q = cur; while (*cur != 0) { if (*cur == '&') { + /* + * Save the current text. + */ if (cur != q) { - node = xmlNewDocTextLen(doc, q, cur - q); - if (node == NULL) return(ret); - if (last == NULL) - last = ret = node; - else { - last->next = node; - last = node; + if ((last != NULL) && (last->type == XML_TEXT_NODE)) { + xmlNodeAddContentLen(last, q, cur - q); + } else { + node = xmlNewDocTextLen(doc, q, cur - q); + if (node == NULL) return(ret); + if (last == NULL) + last = ret = node; + else { + last->next = node; + node->prev = last; + last = node; + } } } + /* + * Read the entity string + */ cur++; q = cur; while ((*cur != 0) && (*cur != ';')) cur++; @@ -467,14 +517,33 @@ xmlStringGetNodeList(xmlDocPtr doc, const CHAR *value) { return(ret); } if (cur != q) { + /* + * Predefined entities don't generate nodes + */ val = xmlStrndup(q, cur - q); - node = xmlNewReference(doc, val); - if (node == NULL) return(ret); - if (last == NULL) - last = ret = node; - else { - last->next = node; - last = node; + ent = xmlGetDocEntity(doc, val); + if ((ent != NULL) && + (ent->type == XML_INTERNAL_PREDEFINED_ENTITY)) { + if (last == NULL) { + node = xmlNewDocText(doc, ent->content); + last = ret = node; + } else + xmlNodeAddContent(last, ent->content); + + } else { + /* + * Create a new REFERENCE_REF node + */ + val = xmlStrndup(q, cur - q); + node = xmlNewReference(doc, val); + if (node == NULL) return(ret); + if (last == NULL) + last = ret = node; + else { + last->next = node; + node->prev = last; + last = node; + } } free(val); } @@ -484,13 +553,21 @@ xmlStringGetNodeList(xmlDocPtr doc, const CHAR *value) { cur++; } if (cur != q) { - node = xmlNewDocTextLen(doc, q, cur - q); - if (node == NULL) return(ret); - if (last == NULL) - last = ret = node; - else { - last->next = node; - last = node; + /* + * Handle the last piece of text. + */ + if ((last != NULL) && (last->type == XML_TEXT_NODE)) { + xmlNodeAddContentLen(last, q, cur - q); + } else { + node = xmlNewDocTextLen(doc, q, cur - q); + if (node == NULL) return(ret); + if (last == NULL) + last = ret = node; + else { + last->next = node; + node->prev = last; + last = node; + } } } return(ret); @@ -2129,46 +2206,3 @@ xmlSaveFile(const char *filename, xmlDocPtr cur) { return(ret * sizeof(CHAR)); } -/************************************************************************ - * * - * Debug * - * * - ************************************************************************/ - -#ifdef STANDALONE -int main(void) { - xmlDocPtr doc; - xmlNodePtr tree, subtree; - xmlNsPtr ns1; - xmlNsPtr ns2; - - /* - * build a fake XML document - */ - doc = xmlNewDoc("1.0"); - ns1 = xmlNewNs(doc, "http://www.ietf.org/standards/dav/", "D"); - ns2 = xmlNewNs(doc, "http://www.w3.com/standards/z39.50/", "Z"); - doc->root = xmlNewDocNode(doc, ns1, "multistatus", NULL); - tree = xmlNewChild(doc->root, NULL, "response", NULL); - subtree = xmlNewChild(tree, NULL, "prop", NULL); - xmlNewChild(subtree, ns2, "Authors", NULL); - subtree = xmlNewChild(tree, NULL, "status", "HTTP/1.1 420 Method Failure"); - tree = xmlNewChild(doc->root, NULL, "response", NULL); - subtree = xmlNewChild(tree, NULL, "prop", NULL); - xmlNewChild(subtree, ns2, "Copyright-Owner", NULL); - subtree = xmlNewChild(tree, NULL, "status", "HTTP/1.1 409 Conflict"); - tree = xmlNewChild(doc->root, NULL, "responsedescription", - "Copyright Owner can not be deleted or altered"); - - /* - * print it. - */ - xmlDocDump(stdout, doc); - - /* - * free it. - */ - xmlFreeDoc(doc); - return(0); -} -#endif |