aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog14
-rw-r--r--SAX.c19
-rw-r--r--TODO5
-rw-r--r--debugXML.c126
-rw-r--r--encoding.c197
-rw-r--r--encoding.h34
-rw-r--r--entities.c304
-rw-r--r--entities.h2
-rw-r--r--include/libxml/encoding.h34
-rw-r--r--include/libxml/entities.h2
-rw-r--r--include/libxml/parser.h2
-rw-r--r--include/libxml/parserInternals.h6
-rw-r--r--include/libxml/valid.h1
-rw-r--r--parser.c96
-rw-r--r--parser.h2
-rw-r--r--parserInternals.h6
-rw-r--r--testHTML.c3
-rw-r--r--uri.c5
-rw-r--r--valid.c80
-rw-r--r--valid.h1
-rw-r--r--xmllint.c19
21 files changed, 778 insertions, 180 deletions
diff --git a/ChangeLog b/ChangeLog
index 42250694..8a55c1b5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+Sat Aug 26 23:31:04 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
+
+ * doc/encoding.html: added encoding aliases doc
+ * doc/xml.html: updates
+ * encoding.[ch]: added EncodingAliases functions
+ * entities.[ch] valid.[ch] debugXML.c: removed two serious
+ bottleneck affecting large DTDs like Docbook
+ * parser.[ch] xmllint.c: added a pedantic option, will be
+ useful
+ * SAX.c: redefinition of entities is reported in pedantic mode
+ * testHTML.c: uninitialized warning from gcc
+ * uri.c: fixed a couple of bugs
+ * TODO: added issue raised by Michael
+
Wed Aug 23 01:50:51 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
* doc/encoding.html: propagated Martin Duerst suggestions
diff --git a/SAX.c b/SAX.c
index fb5e741c..43e847db 100644
--- a/SAX.c
+++ b/SAX.c
@@ -395,19 +395,28 @@ void
entityDecl(void *ctx, const xmlChar *name, int type,
const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
{
+ xmlEntityPtr ent;
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
name, type, publicId, systemId, content);
#endif
- if (ctxt->inSubset == 1)
- xmlAddDocEntity(ctxt->myDoc, name, type, publicId,
+ if (ctxt->inSubset == 1) {
+ ent = xmlAddDocEntity(ctxt->myDoc, name, type, publicId,
systemId, content);
- else if (ctxt->inSubset == 2)
- xmlAddDtdEntity(ctxt->myDoc, name, type, publicId,
+ if ((ent == NULL) && (ctxt->pedantic) &&
+ (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
+ ctxt->sax->warning(ctxt,
+ "Entity(%s) already defined in the internal subset\n", name);
+ } else if (ctxt->inSubset == 2) {
+ ent = xmlAddDtdEntity(ctxt->myDoc, name, type, publicId,
systemId, content);
- else {
+ if ((ent == NULL) && (ctxt->pedantic) &&
+ (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
+ ctxt->sax->warning(ctxt,
+ "Entity(%s) already defined in the external subset\n", name);
+ } else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt,
"SAX.entityDecl(%s) called while not in subset\n", name);
diff --git a/TODO b/TODO
index ee328a84..246eb2da 100644
--- a/TODO
+++ b/TODO
@@ -25,6 +25,11 @@ TODO:
issue a warning
- fix --disable-corba configure switch handling, and use XML_WITHOUT_CORBA
not WITHOUT_CORBA flag
+- reported by Michael, update of doc node when pasting on a new document
+ There can be far more than simply the doc pointer which refers to
+ the old document, for example namespace declarations or entities
+ references can also be a nasty problem, far more than updating the
+ doc values.
TODO:
=====
diff --git a/debugXML.c b/debugXML.c
index 5b211102..7c00fb18 100644
--- a/debugXML.c
+++ b/debugXML.c
@@ -760,38 +760,41 @@ void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) {
xmlEntitiesTablePtr table = (xmlEntitiesTablePtr)
doc->intSubset->entities;
fprintf(output, "Entities in internal subset\n");
- for (i = 0;i < table->nb_entities;i++) {
+ for (i = 0;i < table->max_entities;i++) {
cur = table->table[i];
- fprintf(output, "%d : %s : ", i, cur->name);
- switch (cur->etype) {
- case XML_INTERNAL_GENERAL_ENTITY:
- fprintf(output, "INTERNAL GENERAL, ");
- break;
- case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
- fprintf(output, "EXTERNAL PARSED, ");
- break;
- case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
- fprintf(output, "EXTERNAL UNPARSED, ");
- break;
- case XML_INTERNAL_PARAMETER_ENTITY:
- fprintf(output, "INTERNAL PARAMETER, ");
- break;
- case XML_EXTERNAL_PARAMETER_ENTITY:
- fprintf(output, "EXTERNAL PARAMETER, ");
- break;
- default:
- fprintf(output, "UNKNOWN TYPE %d",
- cur->etype);
+ while (cur != NULL) {
+ fprintf(output, "%d : %s : ", i, cur->name);
+ switch (cur->etype) {
+ case XML_INTERNAL_GENERAL_ENTITY:
+ fprintf(output, "INTERNAL GENERAL, ");
+ break;
+ case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
+ fprintf(output, "EXTERNAL PARSED, ");
+ break;
+ case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
+ fprintf(output, "EXTERNAL UNPARSED, ");
+ break;
+ case XML_INTERNAL_PARAMETER_ENTITY:
+ fprintf(output, "INTERNAL PARAMETER, ");
+ break;
+ case XML_EXTERNAL_PARAMETER_ENTITY:
+ fprintf(output, "EXTERNAL PARAMETER, ");
+ break;
+ default:
+ fprintf(output, "UNKNOWN TYPE %d",
+ cur->etype);
+ }
+ if (cur->ExternalID != NULL)
+ fprintf(output, "ID \"%s\"", cur->ExternalID);
+ if (cur->SystemID != NULL)
+ fprintf(output, "SYSTEM \"%s\"", cur->SystemID);
+ if (cur->orig != NULL)
+ fprintf(output, "\n orig \"%s\"", cur->orig);
+ if (cur->content != NULL)
+ fprintf(output, "\n content \"%s\"", cur->content);
+ fprintf(output, "\n");
+ cur = cur->nexte;
}
- if (cur->ExternalID != NULL)
- fprintf(output, "ID \"%s\"", cur->ExternalID);
- if (cur->SystemID != NULL)
- fprintf(output, "SYSTEM \"%s\"", cur->SystemID);
- if (cur->orig != NULL)
- fprintf(output, "\n orig \"%s\"", cur->orig);
- if (cur->content != NULL)
- fprintf(output, "\n content \"%s\"", cur->content);
- fprintf(output, "\n");
}
} else
fprintf(output, "No entities in internal subset\n");
@@ -799,38 +802,41 @@ void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) {
xmlEntitiesTablePtr table = (xmlEntitiesTablePtr)
doc->extSubset->entities;
fprintf(output, "Entities in external subset\n");
- for (i = 0;i < table->nb_entities;i++) {
+ for (i = 0;i < table->max_entities;i++) {
cur = table->table[i];
- fprintf(output, "%d : %s : ", i, cur->name);
- switch (cur->etype) {
- case XML_INTERNAL_GENERAL_ENTITY:
- fprintf(output, "INTERNAL GENERAL, ");
- break;
- case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
- fprintf(output, "EXTERNAL PARSED, ");
- break;
- case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
- fprintf(output, "EXTERNAL UNPARSED, ");
- break;
- case XML_INTERNAL_PARAMETER_ENTITY:
- fprintf(output, "INTERNAL PARAMETER, ");
- break;
- case XML_EXTERNAL_PARAMETER_ENTITY:
- fprintf(output, "EXTERNAL PARAMETER, ");
- break;
- default:
- fprintf(output, "UNKNOWN TYPE %d",
- cur->etype);
+ while (cur != NULL) {
+ fprintf(output, "%d : %s : ", i, cur->name);
+ switch (cur->etype) {
+ case XML_INTERNAL_GENERAL_ENTITY:
+ fprintf(output, "INTERNAL GENERAL, ");
+ break;
+ case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
+ fprintf(output, "EXTERNAL PARSED, ");
+ break;
+ case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
+ fprintf(output, "EXTERNAL UNPARSED, ");
+ break;
+ case XML_INTERNAL_PARAMETER_ENTITY:
+ fprintf(output, "INTERNAL PARAMETER, ");
+ break;
+ case XML_EXTERNAL_PARAMETER_ENTITY:
+ fprintf(output, "EXTERNAL PARAMETER, ");
+ break;
+ default:
+ fprintf(output, "UNKNOWN TYPE %d",
+ cur->etype);
+ }
+ if (cur->ExternalID != NULL)
+ fprintf(output, "ID \"%s\"", cur->ExternalID);
+ if (cur->SystemID != NULL)
+ fprintf(output, "SYSTEM \"%s\"", cur->SystemID);
+ if (cur->orig != NULL)
+ fprintf(output, "\n orig \"%s\"", cur->orig);
+ if (cur->content != NULL)
+ fprintf(output, "\n content \"%s\"", cur->content);
+ fprintf(output, "\n");
+ cur = cur->nexte;
}
- if (cur->ExternalID != NULL)
- fprintf(output, "ID \"%s\"", cur->ExternalID);
- if (cur->SystemID != NULL)
- fprintf(output, "SYSTEM \"%s\"", cur->SystemID);
- if (cur->orig != NULL)
- fprintf(output, "\n orig \"%s\"", cur->orig);
- if (cur->content != NULL)
- fprintf(output, "\n content \"%s\"", cur->content);
- fprintf(output, "\n");
}
} else
fprintf(output, "No entities in external subset\n");
diff --git a/encoding.c b/encoding.c
index 3031ce8c..99818900 100644
--- a/encoding.c
+++ b/encoding.c
@@ -50,6 +50,17 @@
xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
+typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
+typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
+struct _xmlCharEncodingAlias {
+ const char *name;
+ const char *alias;
+};
+
+static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
+static int xmlCharEncodingAliasesNb = 0;
+static int xmlCharEncodingAliasesMax = 0;
+
#ifdef LIBXML_ICONV_ENABLED
#if 0
#define DEBUG_ENCODING /* Define this to get encoding traces */
@@ -906,6 +917,157 @@ xmlDetectCharEncoding(const unsigned char* in, int len)
}
/**
+ * xmlCleanupEncodingAliases:
+ *
+ * Unregisters all aliases
+ */
+void
+xmlCleanupEncodingAliases(void) {
+ int i;
+
+ if (xmlCharEncodingAliases == NULL)
+ return;
+
+ for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
+ if (xmlCharEncodingAliases[i].name != NULL)
+ xmlFree((char *) xmlCharEncodingAliases[i].name);
+ if (xmlCharEncodingAliases[i].alias != NULL)
+ xmlFree((char *) xmlCharEncodingAliases[i].alias);
+ }
+ xmlCharEncodingAliasesNb = 0;
+ xmlCharEncodingAliasesMax = 0;
+ xmlFree(xmlCharEncodingAliases);
+}
+
+/**
+ * xmlGetEncodingAlias:
+ * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
+ *
+ * Lookup an encoding name for the given alias.
+ *
+ * Returns NULL if not found the original name otherwise
+ */
+const char *
+xmlGetEncodingAlias(const char *alias) {
+ int i;
+ char upper[100];
+
+ if (alias == NULL)
+ return(NULL);
+
+ if (xmlCharEncodingAliases == NULL)
+ return(NULL);
+
+ for (i = 0;i < 99;i++) {
+ upper[i] = toupper(alias[i]);
+ if (upper[i] == 0) break;
+ }
+ upper[i] = 0;
+
+ /*
+ * Walk down the list looking for a definition of the alias
+ */
+ for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
+ if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
+ return(xmlCharEncodingAliases[i].name);
+ }
+ }
+ return(NULL);
+}
+
+/**
+ * xmlAddEncodingAlias:
+ * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
+ * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
+ *
+ * Registers and alias @alias for an encoding named @name. Existing alias
+ * will be overwritten.
+ *
+ * Returns 0 in case of success, -1 in case of error
+ */
+int
+xmlAddEncodingAlias(const char *name, const char *alias) {
+ int i;
+ char upper[100];
+
+ if ((name == NULL) || (alias == NULL))
+ return(-1);
+
+ for (i = 0;i < 99;i++) {
+ upper[i] = toupper(alias[i]);
+ if (upper[i] == 0) break;
+ }
+ upper[i] = 0;
+
+ if (xmlCharEncodingAliases == NULL) {
+ xmlCharEncodingAliasesNb = 0;
+ xmlCharEncodingAliasesMax = 20;
+ xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
+ xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
+ if (xmlCharEncodingAliases == NULL)
+ return(-1);
+ } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
+ xmlCharEncodingAliasesMax *= 2;
+ xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
+ xmlRealloc(xmlCharEncodingAliases,
+ xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
+ }
+ /*
+ * Walk down the list looking for a definition of the alias
+ */
+ for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
+ if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
+ /*
+ * Replace the definition.
+ */
+ xmlFree((char *) xmlCharEncodingAliases[i].name);
+ xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
+ return(0);
+ }
+ }
+ /*
+ * Add the definition
+ */
+ xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
+ xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
+ xmlCharEncodingAliasesNb++;
+ return(0);
+}
+
+/**
+ * xmlDelEncodingAlias:
+ * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
+ *
+ * Unregisters an encoding alias @alias
+ *
+ * Returns 0 in case of success, -1 in case of error
+ */
+int
+xmlDelEncodingAlias(const char *alias) {
+ int i;
+
+ if (alias == NULL)
+ return(-1);
+
+ if (xmlCharEncodingAliases == NULL)
+ return(-1);
+ /*
+ * Walk down the list looking for a definition of the alias
+ */
+ for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
+ if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
+ xmlFree((char *) xmlCharEncodingAliases[i].name);
+ xmlFree((char *) xmlCharEncodingAliases[i].alias);
+ xmlCharEncodingAliasesNb--;
+ memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
+ sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
+ return(0);
+ }
+ }
+ return(-1);
+}
+
+/**
* xmlParseCharEncoding:
* @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
*
@@ -919,9 +1081,20 @@ xmlDetectCharEncoding(const unsigned char* in, int len)
xmlCharEncoding
xmlParseCharEncoding(const char* name)
{
+ const char *alias;
char upper[500];
int i;
+ if (name == NULL)
+ return(XML_CHAR_ENCODING_NONE);
+
+ /*
+ * Do the alias resolution
+ */
+ alias = xmlGetEncodingAlias(name);
+ if (alias != NULL)
+ name = alias;
+
for (i = 0;i < 499;i++) {
upper[i] = toupper(name[i]);
if (upper[i] == 0) break;
@@ -1076,11 +1249,19 @@ xmlNewCharEncodingHandler(const char *name,
xmlCharEncodingInputFunc input,
xmlCharEncodingOutputFunc output) {
xmlCharEncodingHandlerPtr handler;
+ const char *alias;
char upper[500];
int i;
char *up = 0;
/*
+ * Do the alias resolution
+ */
+ alias = xmlGetEncodingAlias(name);
+ if (alias != NULL)
+ name = alias;
+
+ /*
* Keep only the uppercase version of the encoding.
*/
if (name == NULL) {
@@ -1168,10 +1349,12 @@ xmlInitCharEncodingHandlers(void) {
* xmlCleanupCharEncodingHandlers:
*
* Cleanup the memory allocated for the char encoding support, it
- * unregisters all the encoding handlers.
+ * unregisters all the encoding handlers and the aliases.
*/
void
xmlCleanupCharEncodingHandlers(void) {
+ xmlCleanupEncodingAliases();
+
if (handlers == NULL) return;
for (;nbCharEncodingHandler > 0;) {
@@ -1350,6 +1533,8 @@ xmlGetCharEncodingHandler(xmlCharEncoding enc) {
*/
xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char *name) {
+ const char *nalias;
+ const char *norig;
xmlCharEncoding alias;
#ifdef LIBXML_ICONV_ENABLED
xmlCharEncodingHandlerPtr enc;
@@ -1363,6 +1548,14 @@ xmlFindCharEncodingHandler(const char *name) {
if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
/*
+ * Do the alias resolution
+ */
+ norig = name;
+ nalias = xmlGetEncodingAlias(name);
+ if (nalias != NULL)
+ name = nalias;
+
+ /*
* Check first for directly registered encoding names
*/
for (i = 0;i < 99;i++) {
@@ -1412,7 +1605,7 @@ xmlFindCharEncodingHandler(const char *name) {
/*
* Fallback using the canonical names
*/
- alias = xmlParseCharEncoding(name);
+ alias = xmlParseCharEncoding(norig);
if (alias != XML_CHAR_ENCODING_ERROR) {
const char* canon;
canon = xmlGetCharEncodingName(alias);
diff --git a/encoding.h b/encoding.h
index 5b6af9fa..62e81e3d 100644
--- a/encoding.h
+++ b/encoding.h
@@ -133,15 +133,39 @@ struct _xmlCharEncodingHandler {
#endif /* LIBXML_ICONV_ENABLED */
};
+/*
+ * Interfaces for encoding handlers
+ */
void xmlInitCharEncodingHandlers (void);
void xmlCleanupCharEncodingHandlers (void);
void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler);
-xmlCharEncoding xmlDetectCharEncoding (const unsigned char* in,
+xmlCharEncodingHandlerPtr
+ xmlGetCharEncodingHandler (xmlCharEncoding enc);
+xmlCharEncodingHandlerPtr
+ xmlFindCharEncodingHandler (const char *name);
+
+
+/*
+ * Interfaces for encoding names and aliases
+ */
+int xmlAddEncodingAlias (const char *name,
+ const char *alias);
+int xmlDelEncodingAlias (const char *alias);
+const char *
+ xmlGetEncodingAlias (const char *alias);
+void xmlCleanupEncodingAliases (void);
+xmlCharEncoding
+ xmlParseCharEncoding (const char* name);
+const char*
+ xmlGetCharEncodingName (xmlCharEncoding enc);
+
+/*
+ * Interfaces directly used by the parsers.
+ */
+xmlCharEncoding
+ xmlDetectCharEncoding (const unsigned char* in,
int len);
-xmlCharEncoding xmlParseCharEncoding (const char* name);
-const char* xmlGetCharEncodingName (xmlCharEncoding enc);
-xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
-xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
+
int xmlCheckUTF8 (const unsigned char *utf);
int xmlCharEncOutFunc (xmlCharEncodingHandler *handler,
diff --git a/entities.c b/entities.c
index 75059433..1f960551 100644
--- a/entities.c
+++ b/entities.c
@@ -22,6 +22,23 @@
#include <libxml/parser.h>
#define DEBUG_ENT_REF /* debugging of cross entities dependancies */
+#define ENTITY_HASH_SIZE 256 /* modify xmlEntityComputeHash accordingly */
+
+/*
+ * xmlEntityComputeHash:
+ *
+ * Computes the hash value for this given entity
+ */
+int
+xmlEntityComputeHash(const xmlChar *name) {
+ register const unsigned char *cur = (const unsigned char *) name;
+ register unsigned char val = 0;
+
+ if (name == NULL)
+ return(val);
+ while (*cur) val += *cur++;
+ return(val);
+}
/*
* The XML predefined entities.
@@ -39,6 +56,10 @@ struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = {
{ "amp", "&" }
};
+/*
+ * TODO: !!!!!!! This is GROSS, allocation of a 256 entry hash for
+ * a fixed number of 4 elements !
+ */
xmlEntitiesTablePtr xmlPredefinedEntities = NULL;
/*
@@ -77,10 +98,41 @@ void xmlFreeEntity(xmlEntityPtr entity) {
*/
static xmlEntityPtr
xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type,
- const xmlChar *ExternalID, const xmlChar *SystemID, const xmlChar *content) {
+ const xmlChar *ExternalID, const xmlChar *SystemID,
+ const xmlChar *content) {
+#ifndef ENTITY_HASH_SIZE
int i;
+#endif
+ int hash;
xmlEntityPtr ret;
+ if (name == NULL)
+ return(NULL);
+#ifdef ENTITY_HASH_SIZE
+ hash = xmlEntityComputeHash(name);
+ ret = table->table[hash];
+ while (ret != NULL) {
+ if (!xmlStrcmp(ret->name, name)) {
+ /*
+ * The entity is already defined in this Dtd, the spec says to NOT
+ * override it ... Is it worth a Warning ??? !!!
+ * Not having a cprinting context this seems hard ...
+ */
+ if (((type == XML_INTERNAL_PARAMETER_ENTITY) ||
+ (type == XML_EXTERNAL_PARAMETER_ENTITY)) &&
+ ((ret->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
+ (ret->etype == XML_EXTERNAL_PARAMETER_ENTITY)))
+ return(NULL);
+ else
+ if (((type != XML_INTERNAL_PARAMETER_ENTITY) &&
+ (type != XML_EXTERNAL_PARAMETER_ENTITY)) &&
+ ((ret->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
+ (ret->etype != XML_EXTERNAL_PARAMETER_ENTITY)))
+ return(NULL);
+ }
+ ret = ret->nexte;
+ }
+#else
for (i = 0;i < table->nb_entities;i++) {
ret = table->table[i];
if (!xmlStrcmp(ret->name, name)) {
@@ -115,6 +167,7 @@ xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type,
return(NULL);
}
}
+#endif
ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
if (ret == NULL) {
fprintf(stderr, "xmlAddEntity: out of memory\n");
@@ -122,7 +175,12 @@ xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type,
}
memset(ret, 0, sizeof(xmlEntity));
ret->type = XML_ENTITY_DECL;
+#ifdef ENTITY_HASH_SIZE
+ ret->nexte = table->table[hash];
+ table->table[hash] = ret;
+#else
table->table[table->nb_entities] = ret;
+#endif
/*
* fill the structure.
@@ -202,10 +260,20 @@ xmlGetPredefinedEntity(const xmlChar *name) {
if (xmlPredefinedEntities == NULL)
xmlInitializePredefinedEntities();
+#ifdef ENTITY_HASH_SIZE
+ i = xmlEntityComputeHash(name);
+ cur = xmlPredefinedEntities->table[i];
+ while (cur != NULL) {
+ if (!xmlStrcmp(cur->name, name))
+ return(cur);
+ cur = cur->nexte;
+ }
+#else
for (i = 0;i < xmlPredefinedEntities->nb_entities;i++) {
cur = xmlPredefinedEntities->table[i];
if (!xmlStrcmp(cur->name, name)) return(cur);
}
+#endif
return(NULL);
}
@@ -455,6 +523,58 @@ xmlEntityAddReference(xmlEntityPtr ent, const xmlChar *to) {
}
#endif
+
+/**
+ * xmlGetEntityFromTable:
+ * @table: an entity table
+ * @name: the entity name
+ * @parameter: look for parameter entities
+ *
+ * Do an entity lookup in the table.
+ * returns the corresponding parameter entity, if found.
+ *
+ * Returns A pointer to the entity structure or NULL if not found.
+ */
+xmlEntityPtr
+xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name,
+ int parameter) {
+ xmlEntityPtr cur;
+#ifdef ENTITY_HASH_SIZE
+ int hash;
+
+ hash = xmlEntityComputeHash(name);
+ cur = table->table[hash];
+ while (cur != NULL) {
+ switch (cur->etype) {
+ case XML_INTERNAL_PARAMETER_ENTITY:
+ case XML_EXTERNAL_PARAMETER_ENTITY:
+ if ((parameter) && (!xmlStrcmp(cur->name, name)))
+ return(cur);
+ default:
+ if ((!parameter) && (!xmlStrcmp(cur->name, name)))
+ return(cur);
+ }
+ cur = cur->nexte;
+ }
+#else
+ int i;
+
+ for (i = 0;i < table->nb_entities;i++) {
+ cur = table->table[i];
+ switch (cur->etype) {
+ case XML_INTERNAL_PARAMETER_ENTITY:
+ case XML_EXTERNAL_PARAMETER_ENTITY:
+ if ((parameter) && (!xmlStrcmp(cur->name, name)))
+ return(cur);
+ default:
+ if ((!parameter) && (!xmlStrcmp(cur->name, name)))
+ return(cur);
+ }
+ }
+#endif
+ return(NULL);
+}
+
/**
* xmlGetParameterEntity:
* @doc: the document referencing the entity
@@ -467,36 +587,18 @@ xmlEntityAddReference(xmlEntityPtr ent, const xmlChar *to) {
*/
xmlEntityPtr
xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
- int i;
- xmlEntityPtr cur;
xmlEntitiesTablePtr table;
+ xmlEntityPtr ret;
if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->intSubset->entities;
- for (i = 0;i < table->nb_entities;i++) {
- cur = table->table[i];
- if (((cur->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
- (cur->etype == XML_EXTERNAL_PARAMETER_ENTITY)) &&
- (!xmlStrcmp(cur->name, name))) return(cur);
- }
- }
- if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
- table = (xmlEntitiesTablePtr) doc->extSubset->entities;
- for (i = 0;i < table->nb_entities;i++) {
- cur = table->table[i];
- if (((cur->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
- (cur->etype == XML_EXTERNAL_PARAMETER_ENTITY)) &&
- (!xmlStrcmp(cur->name, name))) return(cur);
- }
+ ret = xmlGetEntityFromTable(table, name, 1);
+ if (ret != NULL)
+ return(ret);
}
if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->extSubset->entities;
- for (i = 0;i < table->nb_entities;i++) {
- cur = table->table[i];
- if (((cur->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
- (cur->etype == XML_EXTERNAL_PARAMETER_ENTITY)) &&
- (!xmlStrcmp(cur->name, name))) return(cur);
- }
+ return(xmlGetEntityFromTable(table, name, 1));
}
return(NULL);
}
@@ -513,18 +615,11 @@ xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
*/
xmlEntityPtr
xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
- int i;
- xmlEntityPtr cur;
xmlEntitiesTablePtr table;
if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->extSubset->entities;
- for (i = 0;i < table->nb_entities;i++) {
- cur = table->table[i];
- if ((cur->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
- (cur->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
- (!xmlStrcmp(cur->name, name))) return(cur);
- }
+ return(xmlGetEntityFromTable(table, name, 0));
}
return(NULL);
}
@@ -542,39 +637,25 @@ xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
*/
xmlEntityPtr
xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
- int i;
xmlEntityPtr cur;
xmlEntitiesTablePtr table;
if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->intSubset->entities;
- for (i = 0;i < table->nb_entities;i++) {
- cur = table->table[i];
- if ((cur->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
- (cur->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
- (!xmlStrcmp(cur->name, name))) return(cur);
- }
+ cur = xmlGetEntityFromTable(table, name, 0);
+ if (cur != NULL)
+ return(cur);
}
if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
table = (xmlEntitiesTablePtr) doc->extSubset->entities;
- for (i = 0;i < table->nb_entities;i++) {
- cur = table->table[i];
- if ((cur->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
- (cur->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
- (!xmlStrcmp(cur->name, name))) return(cur);
- }
+ cur = xmlGetEntityFromTable(table, name, 0);
+ if (cur != NULL)
+ return(cur);
}
if (xmlPredefinedEntities == NULL)
xmlInitializePredefinedEntities();
table = xmlPredefinedEntities;
- for (i = 0;i < table->nb_entities;i++) {
- cur = table->table[i];
- if ((cur->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
- (cur->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
- (!xmlStrcmp(cur->name, name))) return(cur);
- }
-
- return(NULL);
+ return(xmlGetEntityFromTable(table, name, 0));
}
/*
@@ -1029,8 +1110,9 @@ xmlCreateEntitiesTable(void) {
(long)sizeof(xmlEntitiesTable));
return(NULL);
}
- ret->max_entities = XML_MIN_ENTITIES_TABLE;
ret->nb_entities = 0;
+#ifdef ENTITY_HASH_SIZE
+ ret->max_entities = ENTITY_HASH_SIZE;
ret->table = (xmlEntityPtr *)
xmlMalloc(ret->max_entities * sizeof(xmlEntityPtr));
if (ret == NULL) {
@@ -1039,6 +1121,18 @@ xmlCreateEntitiesTable(void) {
xmlFree(ret);
return(NULL);
}
+ memset(ret->table, 0, ret->max_entities * sizeof(xmlEntityPtr));
+#else
+ ret->max_entities = XML_MIN_ENTITIES_TABLE;
+ ret->table = (xmlEntityPtr *)
+ xmlMalloc(ret->max_entities * sizeof(xmlEntityPtr));
+ if (ret == NULL) {
+ fprintf(stderr, "xmlCreateEntitiesTable : xmlMalloc(%ld) failed\n",
+ ret->max_entities * (long)sizeof(xmlEntityPtr));
+ xmlFree(ret);
+ return(NULL);
+ }
+#endif
return(ret);
}
@@ -1051,17 +1145,65 @@ xmlCreateEntitiesTable(void) {
void
xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
int i;
+#ifdef ENTITY_HASH_SIZE
+ xmlEntityPtr cur, next;
+#endif
if (table == NULL) return;
+#ifdef ENTITY_HASH_SIZE
+ for (i = 0;i < ENTITY_HASH_SIZE;i++) {
+ cur = table->table[i];
+ while (cur != NULL) {
+ next = cur->nexte;
+ xmlFreeEntity(cur);
+ cur = next;
+ }
+ }
+#else
for (i = 0;i < table->nb_entities;i++) {
xmlFreeEntity(table->table[i]);
}
+#endif
xmlFree(table->table);
xmlFree(table);
}
/**
+ * xmlCopyEntity:
+ * @ent: An entity
+ *
+ * Build a copy of an entity
+ *
+ * Returns the new xmlEntitiesPtr or NULL in case of error.
+ */
+xmlEntityPtr
+xmlCopyEntity(xmlEntityPtr ent) {
+ xmlEntityPtr cur;
+
+ cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
+ if (cur == NULL) {
+ fprintf(stderr, "xmlCopyEntity: out of memory !\n");
+ return(NULL);
+ }
+ memset(cur, 0, sizeof(xmlEntity));
+ cur->type = XML_ELEMENT_DECL;
+
+ cur->etype = ent->etype;
+ if (ent->name != NULL)
+ cur->name = xmlStrdup(ent->name);
+ if (ent->ExternalID != NULL)
+ cur->ExternalID = xmlStrdup(ent->ExternalID);
+ if (ent->SystemID != NULL)
+ cur->SystemID = xmlStrdup(ent->SystemID);
+ if (ent->content != NULL)
+ cur->content = xmlStrdup(ent->content);
+ if (ent->orig != NULL)
+ cur->orig = xmlStrdup(ent->orig);
+ return(cur);
+}
+
+/**
* xmlCopyEntitiesTable:
* @table: An entity table
*
@@ -1080,6 +1222,15 @@ xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
fprintf(stderr, "xmlCopyEntitiesTable: out of memory !\n");
return(NULL);
}
+#ifdef ENTITY_HASH_SIZE
+ ret->table = (xmlEntityPtr *) xmlMalloc(ENTITY_HASH_SIZE *
+ sizeof(xmlEntityPtr));
+ if (ret->table == NULL) {
+ fprintf(stderr, "xmlCopyEntitiesTable: out of memory !\n");
+ xmlFree(ret);
+ return(NULL);
+ }
+#else
ret->table = (xmlEntityPtr *) xmlMalloc(table->max_entities *
sizeof(xmlEntityPtr));
if (ret->table == NULL) {
@@ -1087,32 +1238,23 @@ xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
xmlFree(ret);
return(NULL);
}
+#endif
ret->max_entities = table->max_entities;
ret->nb_entities = table->nb_entities;
for (i = 0;i < ret->nb_entities;i++) {
- cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
- if (cur == NULL) {
- fprintf(stderr, "xmlCopyEntityTable: out of memory !\n");
- xmlFree(ret);
- xmlFree(ret->table);
- return(NULL);
- }
- memset(cur, 0, sizeof(xmlEntity));
- cur->type = XML_ELEMENT_DECL;
- ret->table[i] = cur;
ent = table->table[i];
-
- cur->etype = ent->etype;
- if (ent->name != NULL)
- cur->name = xmlStrdup(ent->name);
- if (ent->ExternalID != NULL)
- cur->ExternalID = xmlStrdup(ent->ExternalID);
- if (ent->SystemID != NULL)
- cur->SystemID = xmlStrdup(ent->SystemID);
- if (ent->content != NULL)
- cur->content = xmlStrdup(ent->content);
- if (ent->orig != NULL)
- cur->orig = xmlStrdup(ent->orig);
+ if (ent == NULL)
+ cur = NULL;
+ else
+ cur = xmlCopyEntity(ent);
+ ret->table[i] = cur;
+#ifdef ENTITY_HASH_SIZE
+ ent = ent->nexte;
+ while ((ent != NULL) && (cur != NULL)) {
+ cur->nexte = xmlCopyEntity(ent);
+ cur = cur->nexte;
+ }
+#endif
}
return(ret);
}
@@ -1217,8 +1359,18 @@ xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
if (table == NULL) return;
+#ifdef ENTITY_HASH_SIZE
+ for (i = 0;i < ENTITY_HASH_SIZE;i++) {
+ cur = table->table[i];
+ while (cur != NULL) {
+ xmlDumpEntityDecl(buf, cur);
+ cur = cur->nexte;
+ }
+ }
+#else
for (i = 0;i < table->nb_entities;i++) {
cur = table->table[i];
xmlDumpEntityDecl(buf, cur);
}
+#endif
}
diff --git a/entities.h b/entities.h
index c9bd0035..e3930425 100644
--- a/entities.h
+++ b/entities.h
@@ -54,6 +54,8 @@ struct _xmlEntity {
const xmlChar *ExternalID; /* External identifier for PUBLIC */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */
+ struct _xmlEntity *nexte; /* next entity in the hash table */
+
#ifdef WITH_EXTRA_ENT_DETECT
/* Referenced entities name stack */
xmlChar *ent; /* Current parsed Node */
diff --git a/include/libxml/encoding.h b/include/libxml/encoding.h
index 5b6af9fa..62e81e3d 100644
--- a/include/libxml/encoding.h
+++ b/include/libxml/encoding.h
@@ -133,15 +133,39 @@ struct _xmlCharEncodingHandler {
#endif /* LIBXML_ICONV_ENABLED */
};
+/*
+ * Interfaces for encoding handlers
+ */
void xmlInitCharEncodingHandlers (void);
void xmlCleanupCharEncodingHandlers (void);
void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler);
-xmlCharEncoding xmlDetectCharEncoding (const unsigned char* in,
+xmlCharEncodingHandlerPtr
+ xmlGetCharEncodingHandler (xmlCharEncoding enc);
+xmlCharEncodingHandlerPtr
+ xmlFindCharEncodingHandler (const char *name);
+
+
+/*
+ * Interfaces for encoding names and aliases
+ */
+int xmlAddEncodingAlias (const char *name,
+ const char *alias);
+int xmlDelEncodingAlias (const char *alias);
+const char *
+ xmlGetEncodingAlias (const char *alias);
+void xmlCleanupEncodingAliases (void);
+xmlCharEncoding
+ xmlParseCharEncoding (const char* name);
+const char*
+ xmlGetCharEncodingName (xmlCharEncoding enc);
+
+/*
+ * Interfaces directly used by the parsers.
+ */
+xmlCharEncoding
+ xmlDetectCharEncoding (const unsigned char* in,
int len);
-xmlCharEncoding xmlParseCharEncoding (const char* name);
-const char* xmlGetCharEncodingName (xmlCharEncoding enc);
-xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
-xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
+
int xmlCheckUTF8 (const unsigned char *utf);
int xmlCharEncOutFunc (xmlCharEncodingHandler *handler,
diff --git a/include/libxml/entities.h b/include/libxml/entities.h
index c9bd0035..e3930425 100644
--- a/include/libxml/entities.h
+++ b/include/libxml/entities.h
@@ -54,6 +54,8 @@ struct _xmlEntity {
const xmlChar *ExternalID; /* External identifier for PUBLIC */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */
+ struct _xmlEntity *nexte; /* next entity in the hash table */
+
#ifdef WITH_EXTRA_ENT_DETECT
/* Referenced entities name stack */
xmlChar *ent; /* Current parsed Node */
diff --git a/include/libxml/parser.h b/include/libxml/parser.h
index 6d257c56..5c49eb4d 100644
--- a/include/libxml/parser.h
+++ b/include/libxml/parser.h
@@ -182,6 +182,7 @@ struct _xmlParserCtxt {
actually an xmlCharEncoding */
int nodelen; /* Those two fields are there to */
int nodemem; /* Speed up large node parsing */
+ int pedantic; /* signal pedantic warnings */
};
/**
@@ -354,6 +355,7 @@ xmlDocPtr xmlParseFile (const char *filename);
int xmlSubstituteEntitiesDefault(int val);
int xmlKeepBlanksDefault (int val);
void xmlStopParser (xmlParserCtxtPtr ctxt);
+int xmlPedanticParserDefault(int val);
/**
* Recovery mode
diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h
index f0f7561c..8fd6ffc6 100644
--- a/include/libxml/parserInternals.h
+++ b/include/libxml/parserInternals.h
@@ -432,6 +432,12 @@ extern "C" {
while ((*p) && (*(p) != '<')) (p)++
/**
+ * Not for the faint of heart
+ */
+
+extern int xmlParserDebugEntities;
+
+/**
* Parser context
*/
xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
diff --git a/include/libxml/valid.h b/include/libxml/valid.h
index 6849b97b..ff7a9af1 100644
--- a/include/libxml/valid.h
+++ b/include/libxml/valid.h
@@ -67,6 +67,7 @@ struct _xmlElementTable {
int nb_elements; /* number of elements stored */
int max_elements; /* maximum number of elements */
xmlElementPtr *table; /* the table of elements */
+ int last; /* last element accessed */
};
/*
diff --git a/parser.c b/parser.c
index ae1f5e70..253d9d48 100644
--- a/parser.c
+++ b/parser.c
@@ -48,6 +48,8 @@
#define XML_PARSER_BUFFER_SIZE 100
int xmlGetWarningsDefaultValue = 1;
+int xmlParserDebugEntities = 0;
+
/*
* List of XML prefixed PI allowed by W3C specs
@@ -583,6 +585,7 @@ xmlParserInputShrink(xmlParserInputPtr in) {
int xmlSubstituteEntitiesDefaultValue = 0;
int xmlDoValidityCheckingDefaultValue = 0;
+int xmlPedanticParserDefaultValue = 0;
int xmlKeepBlanksDefaultValue = 1;
xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
const xmlChar ** str);
@@ -1209,6 +1212,8 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt) {
if (ctxt->inputNr == 1) return(0); /* End of main Input */
+ if (xmlParserDebugEntities)
+ fprintf(stderr, "Popping input %d\n", ctxt->inputNr);
xmlFreeInputStream(inputPop(ctxt));
if ((*ctxt->input->cur == 0) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
@@ -1227,6 +1232,13 @@ xmlPopInput(xmlParserCtxtPtr ctxt) {
void
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
if (input == NULL) return;
+
+ if (xmlParserDebugEntities) {
+ if ((ctxt->input != NULL) && (ctxt->input->filename))
+ fprintf(stderr, "%s(%d): ", ctxt->input->filename,
+ ctxt->input->line);
+ fprintf(stderr, "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
+ }
inputPush(ctxt, input);
GROW;
}
@@ -1298,6 +1310,8 @@ xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
xmlCharEncoding enc) {
xmlParserInputPtr inputStream;
+ if (xmlParserDebugEntities)
+ fprintf(stderr, "new input from I/O\n");
inputStream = xmlNewInputStream(ctxt);
if (inputStream == NULL) {
return(NULL);
@@ -1334,6 +1348,8 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
ctxt->errNo = XML_ERR_INTERNAL_ERROR;
return(NULL);
}
+ if (xmlParserDebugEntities)
+ fprintf(stderr, "new input from entity: %s\n", entity->name);
if (entity->content == NULL) {
switch (entity->etype) {
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
@@ -1396,6 +1412,8 @@ xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
"internal: xmlNewStringInputStream string = NULL\n");
return(NULL);
}
+ if (xmlParserDebugEntities)
+ fprintf(stderr, "new fixed input: %.30s\n", buffer);
input = xmlNewInputStream(ctxt);
if (input == NULL) {
return(NULL);
@@ -1421,6 +1439,8 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
xmlParserInputPtr inputStream;
char *directory = NULL;
+ if (xmlParserDebugEntities)
+ fprintf(stderr, "new input from file: %s\n", filename);
if (ctxt == NULL) return(NULL);
buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
if (buf == NULL) {
@@ -1544,6 +1564,7 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
ctxt->wellFormed = 1;
ctxt->valid = 1;
ctxt->validate = xmlDoValidityCheckingDefaultValue;
+ ctxt->pedantic = xmlPedanticParserDefaultValue;
ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
ctxt->vctxt.userData = ctxt;
if (ctxt->validate) {
@@ -2210,6 +2231,8 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
NEXT;
name = xmlParseName(ctxt);
+ if (xmlParserDebugEntities)
+ fprintf(stderr, "PE Reference: %s\n", name);
if (name == NULL) {
ctxt->errNo = XML_ERR_PEREF_NO_NAME;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
@@ -2247,11 +2270,12 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
* ... The declaration of a parameter entity must precede
* any reference to it...
*/
- if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
+ if ((!ctxt->disableSAX) &&
+ (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
ctxt->vctxt.error(ctxt->vctxt.userData,
"PEReference: %%%s; not found\n", name);
- } else
- if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
+ } else if ((!ctxt->disableSAX) &&
+ (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
ctxt->sax->warning(ctxt->userData,
"PEReference: %%%s; not found\n", name);
ctxt->valid = 0;
@@ -2369,6 +2393,8 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
NEXTL(l);
} else if ((c == '&') && (ctxt->token != '&') &&
(what & XML_SUBSTITUTE_REF)) {
+ if (xmlParserDebugEntities)
+ fprintf(stderr, "decoding Entity Reference\n");
ent = xmlParseEntityRef(ctxt);
if ((ent != NULL) &&
(ctxt->replaceEntities != 0)) {
@@ -2397,6 +2423,8 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
* we break here to flush the current set of chars
* parsed if any. We will be called back later.
*/
+ if (xmlParserDebugEntities)
+ fprintf(stderr, "decoding PE Reference\n");
if (nbchars != 0) break;
xmlParsePEReference(ctxt);
@@ -2484,6 +2512,9 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
COPY_BUF(0,buffer,nbchars,val);
}
} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
+ if (xmlParserDebugEntities)
+ fprintf(stderr, "String decoding Entity Reference: %.30s\n",
+ str);
ent = xmlParseStringEntityRef(ctxt, &str);
if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
if (ent->content != NULL) {
@@ -2524,6 +2555,8 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
buffer[nbchars++] = ';';
}
} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
+ if (xmlParserDebugEntities)
+ fprintf(stderr, "String decoding PE Reference: %.30s\n", str);
ent = xmlParseStringPEReference(ctxt, &str);
if (ent != NULL) {
xmlChar *rep;
@@ -4003,6 +4036,7 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
while ((RAW == 0) && (ctxt->inputNr > 1))
xmlPopInput(ctxt);
+ GROW;
c = CUR_CHAR(l);
if (c == 0) {
GROW;
@@ -5642,12 +5676,14 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
return;
}
SKIP_BLANKS;
+ GROW;
while (RAW != '>') {
const xmlChar *check = CUR_PTR;
int type;
int def;
xmlChar *defaultValue = NULL;
+ GROW;
tree = NULL;
attrName = xmlParseName(ctxt);
if (attrName == NULL) {
@@ -6462,8 +6498,6 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
* xmlParseConditionalSections
* @ctxt: an XML parser context
*
- * TODO : Conditionnal section are not yet supported !
- *
* [61] conditionalSect ::= includeSect | ignoreSect
* [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
* [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
@@ -6490,6 +6524,13 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
} else {
NEXT;
}
+ if (xmlParserDebugEntities) {
+ if ((ctxt->input != NULL) && (ctxt->input->filename))
+ fprintf(stderr, "%s(%d): ", ctxt->input->filename,
+ ctxt->input->line);
+ fprintf(stderr, "Entering INCLUDE Conditional Section\n");
+ }
+
while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
(NXT(2) != '>'))) {
const xmlChar *check = CUR_PTR;
@@ -6522,6 +6563,13 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
break;
}
}
+ if (xmlParserDebugEntities) {
+ if ((ctxt->input != NULL) && (ctxt->input->filename))
+ fprintf(stderr, "%s(%d): ", ctxt->input->filename,
+ ctxt->input->line);
+ fprintf(stderr, "Leaving INCLUDE Conditional Section\n");
+ }
+
} else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
(NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
int state;
@@ -6538,12 +6586,19 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
} else {
NEXT;
}
+ if (xmlParserDebugEntities) {
+ if ((ctxt->input != NULL) && (ctxt->input->filename))
+ fprintf(stderr, "%s(%d): ", ctxt->input->filename,
+ ctxt->input->line);
+ fprintf(stderr, "Entering IGNORE Conditional Section\n");
+ }
/*
* Parse up to the end of the conditionnal section
* But disable SAX event generating DTD building in the meantime
*/
state = ctxt->disableSAX;
+ ctxt->disableSAX = 1;
while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
(NXT(2) != '>'))) {
const xmlChar *check = CUR_PTR;
@@ -6577,6 +6632,13 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
}
}
ctxt->disableSAX = state;
+ if (xmlParserDebugEntities) {
+ if ((ctxt->input != NULL) && (ctxt->input->filename))
+ fprintf(stderr, "%s(%d): ", ctxt->input->filename,
+ ctxt->input->line);
+ fprintf(stderr, "Leaving IGNORE Conditional Section\n");
+ }
+
} else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
@@ -6644,6 +6706,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
int cons = ctxt->input->consumed;
int tok = ctxt->token;
+ GROW;
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
xmlParseConditionalSections(ctxt);
} else if (IS_BLANK(CUR)) {
@@ -7349,7 +7412,8 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) {
if ((ctxt->standalone == 1) ||
((ctxt->hasExternalSubset == 0) &&
(ctxt->hasPErefs == 0))) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ if ((!ctxt->disableSAX) &&
+ (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
"PEReference: %%%s; not found\n", name);
ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
@@ -7363,7 +7427,8 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) {
* ... The declaration of a parameter entity must precede
* any reference to it...
*/
- if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
+ if ((!ctxt->disableSAX) &&
+ (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
ctxt->sax->warning(ctxt->userData,
"PEReference: %%%s; not found\n", name);
ctxt->valid = 0;
@@ -11472,6 +11537,23 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
/**
+ * xmlPedanticParserDefault:
+ * @val: int 0 or 1
+ *
+ * Set and return the previous value for enabling pedantic warnings.
+ *
+ * Returns the last value for 0 for no substitution, 1 for substitution.
+ */
+
+int
+xmlPedanticParserDefault(int val) {
+ int old = xmlPedanticParserDefaultValue;
+
+ xmlPedanticParserDefaultValue = val;
+ return(old);
+}
+
+/**
* xmlSubstituteEntitiesDefault:
* @val: int 0 or 1
*
diff --git a/parser.h b/parser.h
index 6d257c56..5c49eb4d 100644
--- a/parser.h
+++ b/parser.h
@@ -182,6 +182,7 @@ struct _xmlParserCtxt {
actually an xmlCharEncoding */
int nodelen; /* Those two fields are there to */
int nodemem; /* Speed up large node parsing */
+ int pedantic; /* signal pedantic warnings */
};
/**
@@ -354,6 +355,7 @@ xmlDocPtr xmlParseFile (const char *filename);
int xmlSubstituteEntitiesDefault(int val);
int xmlKeepBlanksDefault (int val);
void xmlStopParser (xmlParserCtxtPtr ctxt);
+int xmlPedanticParserDefault(int val);
/**
* Recovery mode
diff --git a/parserInternals.h b/parserInternals.h
index f0f7561c..8fd6ffc6 100644
--- a/parserInternals.h
+++ b/parserInternals.h
@@ -432,6 +432,12 @@ extern "C" {
while ((*p) && (*(p) != '<')) (p)++
/**
+ * Not for the faint of heart
+ */
+
+extern int xmlParserDebugEntities;
+
+/**
* Parser context
*/
xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
diff --git a/testHTML.c b/testHTML.c
index d680cf03..f998072b 100644
--- a/testHTML.c
+++ b/testHTML.c
@@ -572,7 +572,8 @@ xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
************************************************************************/
void parseSAXFile(char *filename) {
- htmlDocPtr doc;
+ htmlDocPtr doc = NULL;
+
/*
* Empty callbacks for checking
*/
diff --git a/uri.c b/uri.c
index 28810c7c..4f4452a6 100644
--- a/uri.c
+++ b/uri.c
@@ -1024,14 +1024,15 @@ xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) {
*str = cur;
return(-1);
}
- path[len] = '\0';
if (uri->path != NULL)
memcpy(path, uri->path, len2);
if (slash) {
path[len2] = '/';
len2++;
}
- xmlURIUnescapeString(*str, cur - *str, &path[len2]);
+ path[len2] = 0;
+ if (cur - *str > 0)
+ xmlURIUnescapeString(*str, cur - *str, &path[len2]);
if (uri->path != NULL)
xmlFree(uri->path);
uri->path = path;
diff --git a/valid.c b/valid.c
index 41c04d1c..7aa76abe 100644
--- a/valid.c
+++ b/valid.c
@@ -463,6 +463,7 @@ xmlCreateElementTable(void) {
}
ret->max_elements = XML_MIN_ELEMENT_TABLE;
ret->nb_elements = 0;
+ ret->last = 0;
ret->table = (xmlElementPtr *)
xmlMalloc(ret->max_elements * sizeof(xmlElementPtr));
if (ret->table == NULL) {
@@ -978,6 +979,7 @@ xmlScanIDAttributeDecl(xmlValidCtxtPtr ctxt, xmlElementPtr elem) {
* @tree: if it's an enumeration, the associated list
*
* Register a new attribute declaration
+ * Note that @tree becomes the ownership of the DTD
*
* Returns NULL if not new, othervise the attribute decl
*/
@@ -993,14 +995,17 @@ xmlAddAttributeDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd, const xmlChar *elem,
if (dtd == NULL) {
fprintf(stderr, "xmlAddAttributeDecl: dtd == NULL\n");
+ xmlFreeEnumeration(tree);
return(NULL);
}
if (name == NULL) {
fprintf(stderr, "xmlAddAttributeDecl: name == NULL\n");
+ xmlFreeEnumeration(tree);
return(NULL);
}
if (elem == NULL) {
fprintf(stderr, "xmlAddAttributeDecl: elem == NULL\n");
+ xmlFreeEnumeration(tree);
return(NULL);
}
/*
@@ -1029,6 +1034,7 @@ xmlAddAttributeDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd, const xmlChar *elem,
break;
default:
fprintf(stderr, "xmlAddAttributeDecl: unknown type %d\n", type);
+ xmlFreeEnumeration(tree);
return(NULL);
}
if ((defaultValue != NULL) &&
@@ -1054,19 +1060,59 @@ xmlAddAttributeDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd, const xmlChar *elem,
/*
* Validity Check:
* Search the DTD for previous declarations of the ATTLIST
+ * The initial code used to walk the attribute table comparing
+ * all pairs of element/attribute names, and was far too slow
+ * for large DtDs, we now walk the attribute list associated to
+ * the element declaration instead if this declaration is found.
*/
- for (i = 0;i < table->nb_attributes;i++) {
- cur = table->table[i];
- if ((ns != NULL) && (cur->prefix == NULL)) continue;
- if ((ns == NULL) && (cur->prefix != NULL)) continue;
- if ((!xmlStrcmp(cur->name, name)) && (!xmlStrcmp(cur->elem, elem)) &&
- ((ns == NULL) || (!xmlStrcmp(cur->prefix, ns)))) {
- /*
- * The attribute is already defined in this Dtd.
- */
- VWARNING(ctxt->userData, "Attribute %s on %s: already defined\n",
- elem, name);
- return(NULL);
+ elemDef = xmlGetDtdElementDesc(dtd, elem);
+ if (elemDef != NULL) {
+ /*
+ * follow the attribute list.
+ */
+ cur = elemDef->attributes;
+ while (cur != NULL) {
+ if ((ns != NULL) && (cur->prefix == NULL)) {
+ cur = cur->nexth;
+ continue;
+ }
+ if ((ns == NULL) && (cur->prefix != NULL)) {
+ cur = cur->nexth;
+ continue;
+ }
+ if ((!xmlStrcmp(cur->name, name)) &&
+ ((ns == NULL) || (!xmlStrcmp(cur->prefix, ns)))) {
+ /*
+ * The attribute is already defined in this Dtd.
+ */
+ VWARNING(ctxt->userData,
+ "Attribute %s on %s: already defined\n",
+ name, elem);
+ xmlFreeEnumeration(tree);
+ return(NULL);
+ }
+ cur = cur->nexth;
+ }
+ } else {
+ /*
+ * Walk down the attribute table.
+ */
+ for (i = 0;i < table->nb_attributes;i++) {
+ cur = table->table[i];
+ if ((ns != NULL) && (cur->prefix == NULL)) continue;
+ if ((ns == NULL) && (cur->prefix != NULL)) continue;
+ if ((!xmlStrcmp(cur->name, name)) &&
+ (!xmlStrcmp(cur->elem, elem)) &&
+ ((ns == NULL) || (!xmlStrcmp(cur->prefix, ns)))) {
+ /*
+ * The attribute is already defined in this Dtd.
+ */
+ VWARNING(ctxt->userData,
+ "Attribute %s on %s: already defined\n",
+ elem, name);
+ xmlFreeEnumeration(tree);
+ return(NULL);
+ }
}
}
@@ -1106,7 +1152,6 @@ xmlAddAttributeDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd, const xmlChar *elem,
ret->tree = tree;
if (defaultValue != NULL)
ret->defaultValue = xmlStrdup(defaultValue);
- elemDef = xmlGetDtdElementDesc(dtd, elem);
if (elemDef != NULL) {
if ((type == XML_ATTRIBUTE_ID) &&
(xmlScanIDAttributeDecl(NULL, elemDef) != 0))
@@ -2166,10 +2211,17 @@ xmlGetDtdElementDesc(xmlDtdPtr dtd, const xmlChar *name) {
if (dtd->elements == NULL) return(NULL);
table = (xmlElementTablePtr) dtd->elements;
+ if ((table->last >= 0) && (table->last < table->nb_elements)) {
+ cur = table->table[table->last];
+ if (!xmlStrcmp(cur->name, name))
+ return(cur);
+ }
for (i = 0;i < table->nb_elements;i++) {
cur = table->table[i];
- if (!xmlStrcmp(cur->name, name))
+ if (!xmlStrcmp(cur->name, name)) {
+ table->last = i;
return(cur);
+ }
}
/*
diff --git a/valid.h b/valid.h
index 6849b97b..ff7a9af1 100644
--- a/valid.h
+++ b/valid.h
@@ -67,6 +67,7 @@ struct _xmlElementTable {
int nb_elements; /* number of elements stored */
int max_elements; /* maximum number of elements */
xmlElementPtr *table; /* the table of elements */
+ int last; /* last element accessed */
};
/*
diff --git a/xmllint.c b/xmllint.c
index 6027d384..f9d96849 100644
--- a/xmllint.c
+++ b/xmllint.c
@@ -564,7 +564,7 @@ void parseAndPrintFile(char *filename) {
#ifdef LIBXML_DEBUG_ENABLED
if ((debugent) && (!html))
- xmlDebugDumpEntities(stdout, doc);
+ xmlDebugDumpEntities(stderr, doc);
#endif
/*
@@ -582,8 +582,6 @@ int main(int argc, char **argv) {
#ifdef LIBXML_DEBUG_ENABLED
if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
debug++;
- else if ((!strcmp(argv[i], "-debugent")) || (!strcmp(argv[i], "--debugent")))
- debugent++;
else if ((!strcmp(argv[i], "-shell")) ||
(!strcmp(argv[i], "--shell"))) {
shell++;
@@ -644,11 +642,26 @@ int main(int argc, char **argv) {
else if ((!strcmp(argv[i], "-nowarning")) ||
(!strcmp(argv[i], "--nowarning"))) {
xmlGetWarningsDefaultValue = 0;
+ xmlPedanticParserDefault(0);
+ }
+ else if ((!strcmp(argv[i], "-pedantic")) ||
+ (!strcmp(argv[i], "--pedantic"))) {
+ xmlGetWarningsDefaultValue = 1;
+ xmlPedanticParserDefault(1);
}
+ else if ((!strcmp(argv[i], "-debugent")) ||
+ (!strcmp(argv[i], "--debugent"))) {
+ debugent++;
+ xmlParserDebugEntities = 1;
+ }
else if ((!strcmp(argv[i], "-encode")) ||
(!strcmp(argv[i], "--encode"))) {
i++;
encoding = argv[i];
+ /*
+ * OK it's for testing purposes
+ */
+ xmlAddEncodingAlias("UTF-8", "DVEnc");
}
else if ((!strcmp(argv[i], "-noblanks")) ||
(!strcmp(argv[i], "--noblanks"))) {