diff options
| author | Daniel Veillard <veillard@src.gnome.org> | 2003-02-05 10:17:38 +0000 |
|---|---|---|
| committer | Daniel Veillard <veillard@src.gnome.org> | 2003-02-05 10:17:38 +0000 |
| commit | 930dfb6324a11cf9eb6e00db6a374f9f8053ff02 (patch) | |
| tree | 8cd41e110025d0ea0c1ed6dc98ac25120ee52f6d /include | |
| parent | 4287c57a0e6493b986f0dfc8cd2ae0985ff9aa2d (diff) | |
| download | android_external_libxml2-930dfb6324a11cf9eb6e00db6a374f9f8053ff02.tar.gz android_external_libxml2-930dfb6324a11cf9eb6e00db6a374f9f8053ff02.tar.bz2 android_external_libxml2-930dfb6324a11cf9eb6e00db6a374f9f8053ff02.zip | |
applied HTML improvements from Nick Kew, allowing to do more checking to
* HTMLparser.c include/libxml/HTMLparser.h: applied HTML
improvements from Nick Kew, allowing to do more checking
to HTML elements and attributes.
Daniel
Diffstat (limited to 'include')
| -rw-r--r-- | include/libxml/HTMLparser.h | 44 |
1 files changed, 43 insertions, 1 deletions
diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h index f0921371..c6d8899c 100644 --- a/include/libxml/HTMLparser.h +++ b/include/libxml/HTMLparser.h @@ -28,7 +28,8 @@ typedef xmlDocPtr htmlDocPtr; typedef xmlNodePtr htmlNodePtr; /* - * Internal description of an HTML element. + * Internal description of an HTML element, representing HTML 4.01 + * and XHTML 1.0 (which share the same structure). */ typedef struct _htmlElemDesc htmlElemDesc; typedef htmlElemDesc *htmlElemDescPtr; @@ -42,6 +43,23 @@ struct _htmlElemDesc { char dtd; /* 1: only in Loose DTD, 2: only Frameset one */ char isinline; /* is this a block 0 or inline 1 element */ const char *desc; /* the description */ + +/* NRK Jan.2003 + * New fields encapsulating HTML structure + * + * Bugs: + * This is a very limited representation. It fails to tell us when + * an element *requires* subelements (we only have whether they're + * allowed or not), and it doesn't tell us where CDATA and PCDATA + * are allowed. Some element relationships are not fully represented: + * these are flagged with the word MODIFIER + */ + const char** subelts; /* allowed sub-elements of this element */ + const char* defaultsubelt; /* subelement for suggested auto-repair + if necessary or NULL */ + const char** attrs_opt; /* Optional Attributes */ + const char** attrs_depr; /* Additional deprecated attributes */ + const char** attrs_req; /* Required attributes */ }; /* @@ -110,6 +128,30 @@ int htmlParseChunk (htmlParserCtxtPtr ctxt, const char *chunk, int size, int terminate); + +/* NRK/Jan2003: further knowledge of HTML structure + */ +typedef enum { + HTML_NA = 0 , /* something we don't check at all */ + HTML_INVALID = 0x1 , + HTML_DEPRECATED = 0x2 , + HTML_VALID = 0x4 , + HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */ +} htmlStatus ; + +/* Using htmlElemDesc rather than name here, to emphasise the fact + that otherwise there's a lookup overhead +*/ +htmlStatus htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ; +int htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ; +htmlStatus htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ; +htmlStatus htmlNodeStatus(const htmlNodePtr, int) ; +#define htmlDefaultSubelement(elt) elt->defaultsubelt +#define htmlElementAllowedHereDesc(parent,elt) \ + htmlElementAllowedHere((parent), (elt)->name) +#define htmlRequiredAttrs(elt) (elt)->attrs_req + + #ifdef __cplusplus } #endif |
