diff options
-rw-r--r-- | ChangeLog | 14 | ||||
-rw-r--r-- | SAX.c | 19 | ||||
-rw-r--r-- | TODO | 5 | ||||
-rw-r--r-- | debugXML.c | 126 | ||||
-rw-r--r-- | encoding.c | 197 | ||||
-rw-r--r-- | encoding.h | 34 | ||||
-rw-r--r-- | entities.c | 304 | ||||
-rw-r--r-- | entities.h | 2 | ||||
-rw-r--r-- | include/libxml/encoding.h | 34 | ||||
-rw-r--r-- | include/libxml/entities.h | 2 | ||||
-rw-r--r-- | include/libxml/parser.h | 2 | ||||
-rw-r--r-- | include/libxml/parserInternals.h | 6 | ||||
-rw-r--r-- | include/libxml/valid.h | 1 | ||||
-rw-r--r-- | parser.c | 96 | ||||
-rw-r--r-- | parser.h | 2 | ||||
-rw-r--r-- | parserInternals.h | 6 | ||||
-rw-r--r-- | testHTML.c | 3 | ||||
-rw-r--r-- | uri.c | 5 | ||||
-rw-r--r-- | valid.c | 80 | ||||
-rw-r--r-- | valid.h | 1 | ||||
-rw-r--r-- | xmllint.c | 19 |
21 files changed, 778 insertions, 180 deletions
@@ -1,3 +1,17 @@ +Sat Aug 26 23:31:04 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org> + + * doc/encoding.html: added encoding aliases doc + * doc/xml.html: updates + * encoding.[ch]: added EncodingAliases functions + * entities.[ch] valid.[ch] debugXML.c: removed two serious + bottleneck affecting large DTDs like Docbook + * parser.[ch] xmllint.c: added a pedantic option, will be + useful + * SAX.c: redefinition of entities is reported in pedantic mode + * testHTML.c: uninitialized warning from gcc + * uri.c: fixed a couple of bugs + * TODO: added issue raised by Michael + Wed Aug 23 01:50:51 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org> * doc/encoding.html: propagated Martin Duerst suggestions @@ -395,19 +395,28 @@ void entityDecl(void *ctx, const xmlChar *name, int type, const xmlChar *publicId, const xmlChar *systemId, xmlChar *content) { + xmlEntityPtr ent; xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; #ifdef DEBUG_SAX fprintf(stderr, "SAX.entityDecl(%s, %d, %s, %s, %s)\n", name, type, publicId, systemId, content); #endif - if (ctxt->inSubset == 1) - xmlAddDocEntity(ctxt->myDoc, name, type, publicId, + if (ctxt->inSubset == 1) { + ent = xmlAddDocEntity(ctxt->myDoc, name, type, publicId, systemId, content); - else if (ctxt->inSubset == 2) - xmlAddDtdEntity(ctxt->myDoc, name, type, publicId, + if ((ent == NULL) && (ctxt->pedantic) && + (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt, + "Entity(%s) already defined in the internal subset\n", name); + } else if (ctxt->inSubset == 2) { + ent = xmlAddDtdEntity(ctxt->myDoc, name, type, publicId, systemId, content); - else { + if ((ent == NULL) && (ctxt->pedantic) && + (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt, + "Entity(%s) already defined in the external subset\n", name); + } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt, "SAX.entityDecl(%s) called while not in subset\n", name); @@ -25,6 +25,11 @@ TODO: issue a warning - fix --disable-corba configure switch handling, and use XML_WITHOUT_CORBA not WITHOUT_CORBA flag +- reported by Michael, update of doc node when pasting on a new document + There can be far more than simply the doc pointer which refers to + the old document, for example namespace declarations or entities + references can also be a nasty problem, far more than updating the + doc values. TODO: ===== @@ -760,38 +760,41 @@ void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) { xmlEntitiesTablePtr table = (xmlEntitiesTablePtr) doc->intSubset->entities; fprintf(output, "Entities in internal subset\n"); - for (i = 0;i < table->nb_entities;i++) { + for (i = 0;i < table->max_entities;i++) { cur = table->table[i]; - fprintf(output, "%d : %s : ", i, cur->name); - switch (cur->etype) { - case XML_INTERNAL_GENERAL_ENTITY: - fprintf(output, "INTERNAL GENERAL, "); - break; - case XML_EXTERNAL_GENERAL_PARSED_ENTITY: - fprintf(output, "EXTERNAL PARSED, "); - break; - case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: - fprintf(output, "EXTERNAL UNPARSED, "); - break; - case XML_INTERNAL_PARAMETER_ENTITY: - fprintf(output, "INTERNAL PARAMETER, "); - break; - case XML_EXTERNAL_PARAMETER_ENTITY: - fprintf(output, "EXTERNAL PARAMETER, "); - break; - default: - fprintf(output, "UNKNOWN TYPE %d", - cur->etype); + while (cur != NULL) { + fprintf(output, "%d : %s : ", i, cur->name); + switch (cur->etype) { + case XML_INTERNAL_GENERAL_ENTITY: + fprintf(output, "INTERNAL GENERAL, "); + break; + case XML_EXTERNAL_GENERAL_PARSED_ENTITY: + fprintf(output, "EXTERNAL PARSED, "); + break; + case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: + fprintf(output, "EXTERNAL UNPARSED, "); + break; + case XML_INTERNAL_PARAMETER_ENTITY: + fprintf(output, "INTERNAL PARAMETER, "); + break; + case XML_EXTERNAL_PARAMETER_ENTITY: + fprintf(output, "EXTERNAL PARAMETER, "); + break; + default: + fprintf(output, "UNKNOWN TYPE %d", + cur->etype); + } + if (cur->ExternalID != NULL) + fprintf(output, "ID \"%s\"", cur->ExternalID); + if (cur->SystemID != NULL) + fprintf(output, "SYSTEM \"%s\"", cur->SystemID); + if (cur->orig != NULL) + fprintf(output, "\n orig \"%s\"", cur->orig); + if (cur->content != NULL) + fprintf(output, "\n content \"%s\"", cur->content); + fprintf(output, "\n"); + cur = cur->nexte; } - if (cur->ExternalID != NULL) - fprintf(output, "ID \"%s\"", cur->ExternalID); - if (cur->SystemID != NULL) - fprintf(output, "SYSTEM \"%s\"", cur->SystemID); - if (cur->orig != NULL) - fprintf(output, "\n orig \"%s\"", cur->orig); - if (cur->content != NULL) - fprintf(output, "\n content \"%s\"", cur->content); - fprintf(output, "\n"); } } else fprintf(output, "No entities in internal subset\n"); @@ -799,38 +802,41 @@ void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) { xmlEntitiesTablePtr table = (xmlEntitiesTablePtr) doc->extSubset->entities; fprintf(output, "Entities in external subset\n"); - for (i = 0;i < table->nb_entities;i++) { + for (i = 0;i < table->max_entities;i++) { cur = table->table[i]; - fprintf(output, "%d : %s : ", i, cur->name); - switch (cur->etype) { - case XML_INTERNAL_GENERAL_ENTITY: - fprintf(output, "INTERNAL GENERAL, "); - break; - case XML_EXTERNAL_GENERAL_PARSED_ENTITY: - fprintf(output, "EXTERNAL PARSED, "); - break; - case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: - fprintf(output, "EXTERNAL UNPARSED, "); - break; - case XML_INTERNAL_PARAMETER_ENTITY: - fprintf(output, "INTERNAL PARAMETER, "); - break; - case XML_EXTERNAL_PARAMETER_ENTITY: - fprintf(output, "EXTERNAL PARAMETER, "); - break; - default: - fprintf(output, "UNKNOWN TYPE %d", - cur->etype); + while (cur != NULL) { + fprintf(output, "%d : %s : ", i, cur->name); + switch (cur->etype) { + case XML_INTERNAL_GENERAL_ENTITY: + fprintf(output, "INTERNAL GENERAL, "); + break; + case XML_EXTERNAL_GENERAL_PARSED_ENTITY: + fprintf(output, "EXTERNAL PARSED, "); + break; + case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: + fprintf(output, "EXTERNAL UNPARSED, "); + break; + case XML_INTERNAL_PARAMETER_ENTITY: + fprintf(output, "INTERNAL PARAMETER, "); + break; + case XML_EXTERNAL_PARAMETER_ENTITY: + fprintf(output, "EXTERNAL PARAMETER, "); + break; + default: + fprintf(output, "UNKNOWN TYPE %d", + cur->etype); + } + if (cur->ExternalID != NULL) + fprintf(output, "ID \"%s\"", cur->ExternalID); + if (cur->SystemID != NULL) + fprintf(output, "SYSTEM \"%s\"", cur->SystemID); + if (cur->orig != NULL) + fprintf(output, "\n orig \"%s\"", cur->orig); + if (cur->content != NULL) + fprintf(output, "\n content \"%s\"", cur->content); + fprintf(output, "\n"); + cur = cur->nexte; } - if (cur->ExternalID != NULL) - fprintf(output, "ID \"%s\"", cur->ExternalID); - if (cur->SystemID != NULL) - fprintf(output, "SYSTEM \"%s\"", cur->SystemID); - if (cur->orig != NULL) - fprintf(output, "\n orig \"%s\"", cur->orig); - if (cur->content != NULL) - fprintf(output, "\n content \"%s\"", cur->content); - fprintf(output, "\n"); } } else fprintf(output, "No entities in external subset\n"); @@ -50,6 +50,17 @@ xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; +typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias; +typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr; +struct _xmlCharEncodingAlias { + const char *name; + const char *alias; +}; + +static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; +static int xmlCharEncodingAliasesNb = 0; +static int xmlCharEncodingAliasesMax = 0; + #ifdef LIBXML_ICONV_ENABLED #if 0 #define DEBUG_ENCODING /* Define this to get encoding traces */ @@ -906,6 +917,157 @@ xmlDetectCharEncoding(const unsigned char* in, int len) } /** + * xmlCleanupEncodingAliases: + * + * Unregisters all aliases + */ +void +xmlCleanupEncodingAliases(void) { + int i; + + if (xmlCharEncodingAliases == NULL) + return; + + for (i = 0;i < xmlCharEncodingAliasesNb;i++) { + if (xmlCharEncodingAliases[i].name != NULL) + xmlFree((char *) xmlCharEncodingAliases[i].name); + if (xmlCharEncodingAliases[i].alias != NULL) + xmlFree((char *) xmlCharEncodingAliases[i].alias); + } + xmlCharEncodingAliasesNb = 0; + xmlCharEncodingAliasesMax = 0; + xmlFree(xmlCharEncodingAliases); +} + +/** + * xmlGetEncodingAlias: + * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) + * + * Lookup an encoding name for the given alias. + * + * Returns NULL if not found the original name otherwise + */ +const char * +xmlGetEncodingAlias(const char *alias) { + int i; + char upper[100]; + + if (alias == NULL) + return(NULL); + + if (xmlCharEncodingAliases == NULL) + return(NULL); + + for (i = 0;i < 99;i++) { + upper[i] = toupper(alias[i]); + if (upper[i] == 0) break; + } + upper[i] = 0; + + /* + * Walk down the list looking for a definition of the alias + */ + for (i = 0;i < xmlCharEncodingAliasesNb;i++) { + if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { + return(xmlCharEncodingAliases[i].name); + } + } + return(NULL); +} + +/** + * xmlAddEncodingAlias: + * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) + * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) + * + * Registers and alias @alias for an encoding named @name. Existing alias + * will be overwritten. + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlAddEncodingAlias(const char *name, const char *alias) { + int i; + char upper[100]; + + if ((name == NULL) || (alias == NULL)) + return(-1); + + for (i = 0;i < 99;i++) { + upper[i] = toupper(alias[i]); + if (upper[i] == 0) break; + } + upper[i] = 0; + + if (xmlCharEncodingAliases == NULL) { + xmlCharEncodingAliasesNb = 0; + xmlCharEncodingAliasesMax = 20; + xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) + xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); + if (xmlCharEncodingAliases == NULL) + return(-1); + } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) { + xmlCharEncodingAliasesMax *= 2; + xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) + xmlRealloc(xmlCharEncodingAliases, + xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); + } + /* + * Walk down the list looking for a definition of the alias + */ + for (i = 0;i < xmlCharEncodingAliasesNb;i++) { + if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { + /* + * Replace the definition. + */ + xmlFree((char *) xmlCharEncodingAliases[i].name); + xmlCharEncodingAliases[i].name = xmlMemStrdup(name); + return(0); + } + } + /* + * Add the definition + */ + xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name); + xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper); + xmlCharEncodingAliasesNb++; + return(0); +} + +/** + * xmlDelEncodingAlias: + * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) + * + * Unregisters an encoding alias @alias + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlDelEncodingAlias(const char *alias) { + int i; + + if (alias == NULL) + return(-1); + + if (xmlCharEncodingAliases == NULL) + return(-1); + /* + * Walk down the list looking for a definition of the alias + */ + for (i = 0;i < xmlCharEncodingAliasesNb;i++) { + if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) { + xmlFree((char *) xmlCharEncodingAliases[i].name); + xmlFree((char *) xmlCharEncodingAliases[i].alias); + xmlCharEncodingAliasesNb--; + memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1], + sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i)); + return(0); + } + } + return(-1); +} + +/** * xmlParseCharEncoding: * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) * @@ -919,9 +1081,20 @@ xmlDetectCharEncoding(const unsigned char* in, int len) xmlCharEncoding xmlParseCharEncoding(const char* name) { + const char *alias; char upper[500]; int i; + if (name == NULL) + return(XML_CHAR_ENCODING_NONE); + + /* + * Do the alias resolution + */ + alias = xmlGetEncodingAlias(name); + if (alias != NULL) + name = alias; + for (i = 0;i < 499;i++) { upper[i] = toupper(name[i]); if (upper[i] == 0) break; @@ -1076,11 +1249,19 @@ xmlNewCharEncodingHandler(const char *name, xmlCharEncodingInputFunc input, xmlCharEncodingOutputFunc output) { xmlCharEncodingHandlerPtr handler; + const char *alias; char upper[500]; int i; char *up = 0; /* + * Do the alias resolution + */ + alias = xmlGetEncodingAlias(name); + if (alias != NULL) + name = alias; + + /* * Keep only the uppercase version of the encoding. */ if (name == NULL) { @@ -1168,10 +1349,12 @@ xmlInitCharEncodingHandlers(void) { * xmlCleanupCharEncodingHandlers: * * Cleanup the memory allocated for the char encoding support, it - * unregisters all the encoding handlers. + * unregisters all the encoding handlers and the aliases. */ void xmlCleanupCharEncodingHandlers(void) { + xmlCleanupEncodingAliases(); + if (handlers == NULL) return; for (;nbCharEncodingHandler > 0;) { @@ -1350,6 +1533,8 @@ xmlGetCharEncodingHandler(xmlCharEncoding enc) { */ xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name) { + const char *nalias; + const char *norig; xmlCharEncoding alias; #ifdef LIBXML_ICONV_ENABLED xmlCharEncodingHandlerPtr enc; @@ -1363,6 +1548,14 @@ xmlFindCharEncodingHandler(const char *name) { if (name[0] == 0) return(xmlDefaultCharEncodingHandler); /* + * Do the alias resolution + */ + norig = name; + nalias = xmlGetEncodingAlias(name); + if (nalias != NULL) + name = nalias; + + /* * Check first for directly registered encoding names */ for (i = 0;i < 99;i++) { @@ -1412,7 +1605,7 @@ xmlFindCharEncodingHandler(const char *name) { /* * Fallback using the canonical names */ - alias = xmlParseCharEncoding(name); + alias = xmlParseCharEncoding(norig); if (alias != XML_CHAR_ENCODING_ERROR) { const char* canon; canon = xmlGetCharEncodingName(alias); @@ -133,15 +133,39 @@ struct _xmlCharEncodingHandler { #endif /* LIBXML_ICONV_ENABLED */ }; +/* + * Interfaces for encoding handlers + */ void xmlInitCharEncodingHandlers (void); void xmlCleanupCharEncodingHandlers (void); void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler); -xmlCharEncoding xmlDetectCharEncoding (const unsigned char* in, +xmlCharEncodingHandlerPtr + xmlGetCharEncodingHandler (xmlCharEncoding enc); +xmlCharEncodingHandlerPtr + xmlFindCharEncodingHandler (const char *name); + + +/* + * Interfaces for encoding names and aliases + */ +int xmlAddEncodingAlias (const char *name, + const char *alias); +int xmlDelEncodingAlias (const char *alias); +const char * + xmlGetEncodingAlias (const char *alias); +void xmlCleanupEncodingAliases (void); +xmlCharEncoding + xmlParseCharEncoding (const char* name); +const char* + xmlGetCharEncodingName (xmlCharEncoding enc); + +/* + * Interfaces directly used by the parsers. + */ +xmlCharEncoding + xmlDetectCharEncoding (const unsigned char* in, int len); -xmlCharEncoding xmlParseCharEncoding (const char* name); -const char* xmlGetCharEncodingName (xmlCharEncoding enc); -xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc); -xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name); + int xmlCheckUTF8 (const unsigned char *utf); int xmlCharEncOutFunc (xmlCharEncodingHandler *handler, @@ -22,6 +22,23 @@ #include <libxml/parser.h> #define DEBUG_ENT_REF /* debugging of cross entities dependancies */ +#define ENTITY_HASH_SIZE 256 /* modify xmlEntityComputeHash accordingly */ + +/* + * xmlEntityComputeHash: + * + * Computes the hash value for this given entity + */ +int +xmlEntityComputeHash(const xmlChar *name) { + register const unsigned char *cur = (const unsigned char *) name; + register unsigned char val = 0; + + if (name == NULL) + return(val); + while (*cur) val += *cur++; + return(val); +} /* * The XML predefined entities. @@ -39,6 +56,10 @@ struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = { { "amp", "&" } }; +/* + * TODO: !!!!!!! This is GROSS, allocation of a 256 entry hash for + * a fixed number of 4 elements ! + */ xmlEntitiesTablePtr xmlPredefinedEntities = NULL; /* @@ -77,10 +98,41 @@ void xmlFreeEntity(xmlEntityPtr entity) { */ static xmlEntityPtr xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type, - const xmlChar *ExternalID, const xmlChar *SystemID, const xmlChar *content) { + const xmlChar *ExternalID, const xmlChar *SystemID, + const xmlChar *content) { +#ifndef ENTITY_HASH_SIZE int i; +#endif + int hash; xmlEntityPtr ret; + if (name == NULL) + return(NULL); +#ifdef ENTITY_HASH_SIZE + hash = xmlEntityComputeHash(name); + ret = table->table[hash]; + while (ret != NULL) { + if (!xmlStrcmp(ret->name, name)) { + /* + * The entity is already defined in this Dtd, the spec says to NOT + * override it ... Is it worth a Warning ??? !!! + * Not having a cprinting context this seems hard ... + */ + if (((type == XML_INTERNAL_PARAMETER_ENTITY) || + (type == XML_EXTERNAL_PARAMETER_ENTITY)) && + ((ret->etype == XML_INTERNAL_PARAMETER_ENTITY) || + (ret->etype == XML_EXTERNAL_PARAMETER_ENTITY))) + return(NULL); + else + if (((type != XML_INTERNAL_PARAMETER_ENTITY) && + (type != XML_EXTERNAL_PARAMETER_ENTITY)) && + ((ret->etype != XML_INTERNAL_PARAMETER_ENTITY) && + (ret->etype != XML_EXTERNAL_PARAMETER_ENTITY))) + return(NULL); + } + ret = ret->nexte; + } +#else for (i = 0;i < table->nb_entities;i++) { ret = table->table[i]; if (!xmlStrcmp(ret->name, name)) { @@ -115,6 +167,7 @@ xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type, return(NULL); } } +#endif ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity)); if (ret == NULL) { fprintf(stderr, "xmlAddEntity: out of memory\n"); @@ -122,7 +175,12 @@ xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type, } memset(ret, 0, sizeof(xmlEntity)); ret->type = XML_ENTITY_DECL; +#ifdef ENTITY_HASH_SIZE + ret->nexte = table->table[hash]; + table->table[hash] = ret; +#else table->table[table->nb_entities] = ret; +#endif /* * fill the structure. @@ -202,10 +260,20 @@ xmlGetPredefinedEntity(const xmlChar *name) { if (xmlPredefinedEntities == NULL) xmlInitializePredefinedEntities(); +#ifdef ENTITY_HASH_SIZE + i = xmlEntityComputeHash(name); + cur = xmlPredefinedEntities->table[i]; + while (cur != NULL) { + if (!xmlStrcmp(cur->name, name)) + return(cur); + cur = cur->nexte; + } +#else for (i = 0;i < xmlPredefinedEntities->nb_entities;i++) { cur = xmlPredefinedEntities->table[i]; if (!xmlStrcmp(cur->name, name)) return(cur); } +#endif return(NULL); } @@ -455,6 +523,58 @@ xmlEntityAddReference(xmlEntityPtr ent, const xmlChar *to) { } #endif + +/** + * xmlGetEntityFromTable: + * @table: an entity table + * @name: the entity name + * @parameter: look for parameter entities + * + * Do an entity lookup in the table. + * returns the corresponding parameter entity, if found. + * + * Returns A pointer to the entity structure or NULL if not found. + */ +xmlEntityPtr +xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name, + int parameter) { + xmlEntityPtr cur; +#ifdef ENTITY_HASH_SIZE + int hash; + + hash = xmlEntityComputeHash(name); + cur = table->table[hash]; + while (cur != NULL) { + switch (cur->etype) { + case XML_INTERNAL_PARAMETER_ENTITY: + case XML_EXTERNAL_PARAMETER_ENTITY: + if ((parameter) && (!xmlStrcmp(cur->name, name))) + return(cur); + default: + if ((!parameter) && (!xmlStrcmp(cur->name, name))) + return(cur); + } + cur = cur->nexte; + } +#else + int i; + + for (i = 0;i < table->nb_entities;i++) { + cur = table->table[i]; + switch (cur->etype) { + case XML_INTERNAL_PARAMETER_ENTITY: + case XML_EXTERNAL_PARAMETER_ENTITY: + if ((parameter) && (!xmlStrcmp(cur->name, name))) + return(cur); + default: + if ((!parameter) && (!xmlStrcmp(cur->name, name))) + return(cur); + } + } +#endif + return(NULL); +} + /** * xmlGetParameterEntity: * @doc: the document referencing the entity @@ -467,36 +587,18 @@ xmlEntityAddReference(xmlEntityPtr ent, const xmlChar *to) { */ xmlEntityPtr xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) { - int i; - xmlEntityPtr cur; xmlEntitiesTablePtr table; + xmlEntityPtr ret; if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) { table = (xmlEntitiesTablePtr) doc->intSubset->entities; - for (i = 0;i < table->nb_entities;i++) { - cur = table->table[i]; - if (((cur->etype == XML_INTERNAL_PARAMETER_ENTITY) || - (cur->etype == XML_EXTERNAL_PARAMETER_ENTITY)) && - (!xmlStrcmp(cur->name, name))) return(cur); - } - } - if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) { - table = (xmlEntitiesTablePtr) doc->extSubset->entities; - for (i = 0;i < table->nb_entities;i++) { - cur = table->table[i]; - if (((cur->etype == XML_INTERNAL_PARAMETER_ENTITY) || - (cur->etype == XML_EXTERNAL_PARAMETER_ENTITY)) && - (!xmlStrcmp(cur->name, name))) return(cur); - } + ret = xmlGetEntityFromTable(table, name, 1); + if (ret != NULL) + return(ret); } if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) { table = (xmlEntitiesTablePtr) doc->extSubset->entities; - for (i = 0;i < table->nb_entities;i++) { - cur = table->table[i]; - if (((cur->etype == XML_INTERNAL_PARAMETER_ENTITY) || - (cur->etype == XML_EXTERNAL_PARAMETER_ENTITY)) && - (!xmlStrcmp(cur->name, name))) return(cur); - } + return(xmlGetEntityFromTable(table, name, 1)); } return(NULL); } @@ -513,18 +615,11 @@ xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) { */ xmlEntityPtr xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) { - int i; - xmlEntityPtr cur; xmlEntitiesTablePtr table; if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) { table = (xmlEntitiesTablePtr) doc->extSubset->entities; - for (i = 0;i < table->nb_entities;i++) { - cur = table->table[i]; - if ((cur->etype != XML_INTERNAL_PARAMETER_ENTITY) && - (cur->etype != XML_EXTERNAL_PARAMETER_ENTITY) && - (!xmlStrcmp(cur->name, name))) return(cur); - } + return(xmlGetEntityFromTable(table, name, 0)); } return(NULL); } @@ -542,39 +637,25 @@ xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) { */ xmlEntityPtr xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) { - int i; xmlEntityPtr cur; xmlEntitiesTablePtr table; if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) { table = (xmlEntitiesTablePtr) doc->intSubset->entities; - for (i = 0;i < table->nb_entities;i++) { - cur = table->table[i]; - if ((cur->etype != XML_INTERNAL_PARAMETER_ENTITY) && - (cur->etype != XML_EXTERNAL_PARAMETER_ENTITY) && - (!xmlStrcmp(cur->name, name))) return(cur); - } + cur = xmlGetEntityFromTable(table, name, 0); + if (cur != NULL) + return(cur); } if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) { table = (xmlEntitiesTablePtr) doc->extSubset->entities; - for (i = 0;i < table->nb_entities;i++) { - cur = table->table[i]; - if ((cur->etype != XML_INTERNAL_PARAMETER_ENTITY) && - (cur->etype != XML_EXTERNAL_PARAMETER_ENTITY) && - (!xmlStrcmp(cur->name, name))) return(cur); - } + cur = xmlGetEntityFromTable(table, name, 0); + if (cur != NULL) + return(cur); } if (xmlPredefinedEntities == NULL) xmlInitializePredefinedEntities(); table = xmlPredefinedEntities; - for (i = 0;i < table->nb_entities;i++) { - cur = table->table[i]; - if ((cur->etype != XML_INTERNAL_PARAMETER_ENTITY) && - (cur->etype != XML_EXTERNAL_PARAMETER_ENTITY) && - (!xmlStrcmp(cur->name, name))) return(cur); - } - - return(NULL); + return(xmlGetEntityFromTable(table, name, 0)); } /* @@ -1029,8 +1110,9 @@ xmlCreateEntitiesTable(void) { (long)sizeof(xmlEntitiesTable)); return(NULL); } - ret->max_entities = XML_MIN_ENTITIES_TABLE; ret->nb_entities = 0; +#ifdef ENTITY_HASH_SIZE + ret->max_entities = ENTITY_HASH_SIZE; ret->table = (xmlEntityPtr *) xmlMalloc(ret->max_entities * sizeof(xmlEntityPtr)); if (ret == NULL) { @@ -1039,6 +1121,18 @@ xmlCreateEntitiesTable(void) { xmlFree(ret); return(NULL); } + memset(ret->table, 0, ret->max_entities * sizeof(xmlEntityPtr)); +#else + ret->max_entities = XML_MIN_ENTITIES_TABLE; + ret->table = (xmlEntityPtr *) + xmlMalloc(ret->max_entities * sizeof(xmlEntityPtr)); + if (ret == NULL) { + fprintf(stderr, "xmlCreateEntitiesTable : xmlMalloc(%ld) failed\n", + ret->max_entities * (long)sizeof(xmlEntityPtr)); + xmlFree(ret); + return(NULL); + } +#endif return(ret); } @@ -1051,17 +1145,65 @@ xmlCreateEntitiesTable(void) { void xmlFreeEntitiesTable(xmlEntitiesTablePtr table) { int i; +#ifdef ENTITY_HASH_SIZE + xmlEntityPtr cur, next; +#endif if (table == NULL) return; +#ifdef ENTITY_HASH_SIZE + for (i = 0;i < ENTITY_HASH_SIZE;i++) { + cur = table->table[i]; + while (cur != NULL) { + next = cur->nexte; + xmlFreeEntity(cur); + cur = next; + } + } +#else for (i = 0;i < table->nb_entities;i++) { xmlFreeEntity(table->table[i]); } +#endif xmlFree(table->table); xmlFree(table); } /** + * xmlCopyEntity: + * @ent: An entity + * + * Build a copy of an entity + * + * Returns the new xmlEntitiesPtr or NULL in case of error. + */ +xmlEntityPtr +xmlCopyEntity(xmlEntityPtr ent) { + xmlEntityPtr cur; + + cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity)); + if (cur == NULL) { + fprintf(stderr, "xmlCopyEntity: out of memory !\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlEntity)); + cur->type = XML_ELEMENT_DECL; + + cur->etype = ent->etype; + if (ent->name != NULL) + cur->name = xmlStrdup(ent->name); + if (ent->ExternalID != NULL) + cur->ExternalID = xmlStrdup(ent->ExternalID); + if (ent->SystemID != NULL) + cur->SystemID = xmlStrdup(ent->SystemID); + if (ent->content != NULL) + cur->content = xmlStrdup(ent->content); + if (ent->orig != NULL) + cur->orig = xmlStrdup(ent->orig); + return(cur); +} + +/** * xmlCopyEntitiesTable: * @table: An entity table * @@ -1080,6 +1222,15 @@ xmlCopyEntitiesTable(xmlEntitiesTablePtr table) { fprintf(stderr, "xmlCopyEntitiesTable: out of memory !\n"); return(NULL); } +#ifdef ENTITY_HASH_SIZE + ret->table = (xmlEntityPtr *) xmlMalloc(ENTITY_HASH_SIZE * + sizeof(xmlEntityPtr)); + if (ret->table == NULL) { + fprintf(stderr, "xmlCopyEntitiesTable: out of memory !\n"); + xmlFree(ret); + return(NULL); + } +#else ret->table = (xmlEntityPtr *) xmlMalloc(table->max_entities * sizeof(xmlEntityPtr)); if (ret->table == NULL) { @@ -1087,32 +1238,23 @@ xmlCopyEntitiesTable(xmlEntitiesTablePtr table) { xmlFree(ret); return(NULL); } +#endif ret->max_entities = table->max_entities; ret->nb_entities = table->nb_entities; for (i = 0;i < ret->nb_entities;i++) { - cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity)); - if (cur == NULL) { - fprintf(stderr, "xmlCopyEntityTable: out of memory !\n"); - xmlFree(ret); - xmlFree(ret->table); - return(NULL); - } - memset(cur, 0, sizeof(xmlEntity)); - cur->type = XML_ELEMENT_DECL; - ret->table[i] = cur; ent = table->table[i]; - - cur->etype = ent->etype; - if (ent->name != NULL) - cur->name = xmlStrdup(ent->name); - if (ent->ExternalID != NULL) - cur->ExternalID = xmlStrdup(ent->ExternalID); - if (ent->SystemID != NULL) - cur->SystemID = xmlStrdup(ent->SystemID); - if (ent->content != NULL) - cur->content = xmlStrdup(ent->content); - if (ent->orig != NULL) - cur->orig = xmlStrdup(ent->orig); + if (ent == NULL) + cur = NULL; + else + cur = xmlCopyEntity(ent); + ret->table[i] = cur; +#ifdef ENTITY_HASH_SIZE + ent = ent->nexte; + while ((ent != NULL) && (cur != NULL)) { + cur->nexte = xmlCopyEntity(ent); + cur = cur->nexte; + } +#endif } return(ret); } @@ -1217,8 +1359,18 @@ xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) { if (table == NULL) return; +#ifdef ENTITY_HASH_SIZE + for (i = 0;i < ENTITY_HASH_SIZE;i++) { + cur = table->table[i]; + while (cur != NULL) { + xmlDumpEntityDecl(buf, cur); + cur = cur->nexte; + } + } +#else for (i = 0;i < table->nb_entities;i++) { cur = table->table[i]; xmlDumpEntityDecl(buf, cur); } +#endif } @@ -54,6 +54,8 @@ struct _xmlEntity { const xmlChar *ExternalID; /* External identifier for PUBLIC */ const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */ + struct _xmlEntity *nexte; /* next entity in the hash table */ + #ifdef WITH_EXTRA_ENT_DETECT /* Referenced entities name stack */ xmlChar *ent; /* Current parsed Node */ diff --git a/include/libxml/encoding.h b/include/libxml/encoding.h index 5b6af9fa..62e81e3d 100644 --- a/include/libxml/encoding.h +++ b/include/libxml/encoding.h @@ -133,15 +133,39 @@ struct _xmlCharEncodingHandler { #endif /* LIBXML_ICONV_ENABLED */ }; +/* + * Interfaces for encoding handlers + */ void xmlInitCharEncodingHandlers (void); void xmlCleanupCharEncodingHandlers (void); void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler); -xmlCharEncoding xmlDetectCharEncoding (const unsigned char* in, +xmlCharEncodingHandlerPtr + xmlGetCharEncodingHandler (xmlCharEncoding enc); +xmlCharEncodingHandlerPtr + xmlFindCharEncodingHandler (const char *name); + + +/* + * Interfaces for encoding names and aliases + */ +int xmlAddEncodingAlias (const char *name, + const char *alias); +int xmlDelEncodingAlias (const char *alias); +const char * + xmlGetEncodingAlias (const char *alias); +void xmlCleanupEncodingAliases (void); +xmlCharEncoding + xmlParseCharEncoding (const char* name); +const char* + xmlGetCharEncodingName (xmlCharEncoding enc); + +/* + * Interfaces directly used by the parsers. + */ +xmlCharEncoding + xmlDetectCharEncoding (const unsigned char* in, int len); -xmlCharEncoding xmlParseCharEncoding (const char* name); -const char* xmlGetCharEncodingName (xmlCharEncoding enc); -xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc); -xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name); + int xmlCheckUTF8 (const unsigned char *utf); int xmlCharEncOutFunc (xmlCharEncodingHandler *handler, diff --git a/include/libxml/entities.h b/include/libxml/entities.h index c9bd0035..e3930425 100644 --- a/include/libxml/entities.h +++ b/include/libxml/entities.h @@ -54,6 +54,8 @@ struct _xmlEntity { const xmlChar *ExternalID; /* External identifier for PUBLIC */ const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */ + struct _xmlEntity *nexte; /* next entity in the hash table */ + #ifdef WITH_EXTRA_ENT_DETECT /* Referenced entities name stack */ xmlChar *ent; /* Current parsed Node */ diff --git a/include/libxml/parser.h b/include/libxml/parser.h index 6d257c56..5c49eb4d 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -182,6 +182,7 @@ struct _xmlParserCtxt { actually an xmlCharEncoding */ int nodelen; /* Those two fields are there to */ int nodemem; /* Speed up large node parsing */ + int pedantic; /* signal pedantic warnings */ }; /** @@ -354,6 +355,7 @@ xmlDocPtr xmlParseFile (const char *filename); int xmlSubstituteEntitiesDefault(int val); int xmlKeepBlanksDefault (int val); void xmlStopParser (xmlParserCtxtPtr ctxt); +int xmlPedanticParserDefault(int val); /** * Recovery mode diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h index f0f7561c..8fd6ffc6 100644 --- a/include/libxml/parserInternals.h +++ b/include/libxml/parserInternals.h @@ -432,6 +432,12 @@ extern "C" { while ((*p) && (*(p) != '<')) (p)++ /** + * Not for the faint of heart + */ + +extern int xmlParserDebugEntities; + +/** * Parser context */ xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur); diff --git a/include/libxml/valid.h b/include/libxml/valid.h index 6849b97b..ff7a9af1 100644 --- a/include/libxml/valid.h +++ b/include/libxml/valid.h @@ -67,6 +67,7 @@ struct _xmlElementTable { int nb_elements; /* number of elements stored */ int max_elements; /* maximum number of elements */ xmlElementPtr *table; /* the table of elements */ + int last; /* last element accessed */ }; /* @@ -48,6 +48,8 @@ #define XML_PARSER_BUFFER_SIZE 100 int xmlGetWarningsDefaultValue = 1; +int xmlParserDebugEntities = 0; + /* * List of XML prefixed PI allowed by W3C specs @@ -583,6 +585,7 @@ xmlParserInputShrink(xmlParserInputPtr in) { int xmlSubstituteEntitiesDefaultValue = 0; int xmlDoValidityCheckingDefaultValue = 0; +int xmlPedanticParserDefaultValue = 0; int xmlKeepBlanksDefaultValue = 1; xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str); @@ -1209,6 +1212,8 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { xmlChar xmlPopInput(xmlParserCtxtPtr ctxt) { if (ctxt->inputNr == 1) return(0); /* End of main Input */ + if (xmlParserDebugEntities) + fprintf(stderr, "Popping input %d\n", ctxt->inputNr); xmlFreeInputStream(inputPop(ctxt)); if ((*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) @@ -1227,6 +1232,13 @@ xmlPopInput(xmlParserCtxtPtr ctxt) { void xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { if (input == NULL) return; + + if (xmlParserDebugEntities) { + if ((ctxt->input != NULL) && (ctxt->input->filename)) + fprintf(stderr, "%s(%d): ", ctxt->input->filename, + ctxt->input->line); + fprintf(stderr, "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); + } inputPush(ctxt, input); GROW; } @@ -1298,6 +1310,8 @@ xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, xmlCharEncoding enc) { xmlParserInputPtr inputStream; + if (xmlParserDebugEntities) + fprintf(stderr, "new input from I/O\n"); inputStream = xmlNewInputStream(ctxt); if (inputStream == NULL) { return(NULL); @@ -1334,6 +1348,8 @@ xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { ctxt->errNo = XML_ERR_INTERNAL_ERROR; return(NULL); } + if (xmlParserDebugEntities) + fprintf(stderr, "new input from entity: %s\n", entity->name); if (entity->content == NULL) { switch (entity->etype) { case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: @@ -1396,6 +1412,8 @@ xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { "internal: xmlNewStringInputStream string = NULL\n"); return(NULL); } + if (xmlParserDebugEntities) + fprintf(stderr, "new fixed input: %.30s\n", buffer); input = xmlNewInputStream(ctxt); if (input == NULL) { return(NULL); @@ -1421,6 +1439,8 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { xmlParserInputPtr inputStream; char *directory = NULL; + if (xmlParserDebugEntities) + fprintf(stderr, "new input from file: %s\n", filename); if (ctxt == NULL) return(NULL); buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); if (buf == NULL) { @@ -1544,6 +1564,7 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) ctxt->wellFormed = 1; ctxt->valid = 1; ctxt->validate = xmlDoValidityCheckingDefaultValue; + ctxt->pedantic = xmlPedanticParserDefaultValue; ctxt->keepBlanks = xmlKeepBlanksDefaultValue; ctxt->vctxt.userData = ctxt; if (ctxt->validate) { @@ -2210,6 +2231,8 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { NEXT; name = xmlParseName(ctxt); + if (xmlParserDebugEntities) + fprintf(stderr, "PE Reference: %s\n", name); if (name == NULL) { ctxt->errNo = XML_ERR_PEREF_NO_NAME; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) @@ -2247,11 +2270,12 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { * ... The declaration of a parameter entity must precede * any reference to it... */ - if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { + if ((!ctxt->disableSAX) && + (ctxt->validate) && (ctxt->vctxt.error != NULL)) { ctxt->vctxt.error(ctxt->vctxt.userData, "PEReference: %%%s; not found\n", name); - } else - if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + } else if ((!ctxt->disableSAX) && + (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) ctxt->sax->warning(ctxt->userData, "PEReference: %%%s; not found\n", name); ctxt->valid = 0; @@ -2369,6 +2393,8 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what, NEXTL(l); } else if ((c == '&') && (ctxt->token != '&') && (what & XML_SUBSTITUTE_REF)) { + if (xmlParserDebugEntities) + fprintf(stderr, "decoding Entity Reference\n"); ent = xmlParseEntityRef(ctxt); if ((ent != NULL) && (ctxt->replaceEntities != 0)) { @@ -2397,6 +2423,8 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what, * we break here to flush the current set of chars * parsed if any. We will be called back later. */ + if (xmlParserDebugEntities) + fprintf(stderr, "decoding PE Reference\n"); if (nbchars != 0) break; xmlParsePEReference(ctxt); @@ -2484,6 +2512,9 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, COPY_BUF(0,buffer,nbchars,val); } } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { + if (xmlParserDebugEntities) + fprintf(stderr, "String decoding Entity Reference: %.30s\n", + str); ent = xmlParseStringEntityRef(ctxt, &str); if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { if (ent->content != NULL) { @@ -2524,6 +2555,8 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, buffer[nbchars++] = ';'; } } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { + if (xmlParserDebugEntities) + fprintf(stderr, "String decoding PE Reference: %.30s\n", str); ent = xmlParseStringPEReference(ctxt, &str); if (ent != NULL) { xmlChar *rep; @@ -4003,6 +4036,7 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { while ((RAW == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt); + GROW; c = CUR_CHAR(l); if (c == 0) { GROW; @@ -5642,12 +5676,14 @@ xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) { return; } SKIP_BLANKS; + GROW; while (RAW != '>') { const xmlChar *check = CUR_PTR; int type; int def; xmlChar *defaultValue = NULL; + GROW; tree = NULL; attrName = xmlParseName(ctxt); if (attrName == NULL) { @@ -6462,8 +6498,6 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) { * xmlParseConditionalSections * @ctxt: an XML parser context * - * TODO : Conditionnal section are not yet supported ! - * * [61] conditionalSect ::= includeSect | ignoreSect * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' @@ -6490,6 +6524,13 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { } else { NEXT; } + if (xmlParserDebugEntities) { + if ((ctxt->input != NULL) && (ctxt->input->filename)) + fprintf(stderr, "%s(%d): ", ctxt->input->filename, + ctxt->input->line); + fprintf(stderr, "Entering INCLUDE Conditional Section\n"); + } + while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || (NXT(2) != '>'))) { const xmlChar *check = CUR_PTR; @@ -6522,6 +6563,13 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { break; } } + if (xmlParserDebugEntities) { + if ((ctxt->input != NULL) && (ctxt->input->filename)) + fprintf(stderr, "%s(%d): ", ctxt->input->filename, + ctxt->input->line); + fprintf(stderr, "Leaving INCLUDE Conditional Section\n"); + } + } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') && (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) { int state; @@ -6538,12 +6586,19 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { } else { NEXT; } + if (xmlParserDebugEntities) { + if ((ctxt->input != NULL) && (ctxt->input->filename)) + fprintf(stderr, "%s(%d): ", ctxt->input->filename, + ctxt->input->line); + fprintf(stderr, "Entering IGNORE Conditional Section\n"); + } /* * Parse up to the end of the conditionnal section * But disable SAX event generating DTD building in the meantime */ state = ctxt->disableSAX; + ctxt->disableSAX = 1; while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') || (NXT(2) != '>'))) { const xmlChar *check = CUR_PTR; @@ -6577,6 +6632,13 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) { } } ctxt->disableSAX = state; + if (xmlParserDebugEntities) { + if ((ctxt->input != NULL) && (ctxt->input->filename)) + fprintf(stderr, "%s(%d): ", ctxt->input->filename, + ctxt->input->line); + fprintf(stderr, "Leaving IGNORE Conditional Section\n"); + } + } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, @@ -6644,6 +6706,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, int cons = ctxt->input->consumed; int tok = ctxt->token; + GROW; if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { xmlParseConditionalSections(ctxt); } else if (IS_BLANK(CUR)) { @@ -7349,7 +7412,8 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) { if ((ctxt->standalone == 1) || ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + if ((!ctxt->disableSAX) && + (ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "PEReference: %%%s; not found\n", name); ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; @@ -7363,7 +7427,8 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) { * ... The declaration of a parameter entity must precede * any reference to it... */ - if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + if ((!ctxt->disableSAX) && + (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) ctxt->sax->warning(ctxt->userData, "PEReference: %%%s; not found\n", name); ctxt->valid = 0; @@ -11472,6 +11537,23 @@ xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, /** + * xmlPedanticParserDefault: + * @val: int 0 or 1 + * + * Set and return the previous value for enabling pedantic warnings. + * + * Returns the last value for 0 for no substitution, 1 for substitution. + */ + +int +xmlPedanticParserDefault(int val) { + int old = xmlPedanticParserDefaultValue; + + xmlPedanticParserDefaultValue = val; + return(old); +} + +/** * xmlSubstituteEntitiesDefault: * @val: int 0 or 1 * @@ -182,6 +182,7 @@ struct _xmlParserCtxt { actually an xmlCharEncoding */ int nodelen; /* Those two fields are there to */ int nodemem; /* Speed up large node parsing */ + int pedantic; /* signal pedantic warnings */ }; /** @@ -354,6 +355,7 @@ xmlDocPtr xmlParseFile (const char *filename); int xmlSubstituteEntitiesDefault(int val); int xmlKeepBlanksDefault (int val); void xmlStopParser (xmlParserCtxtPtr ctxt); +int xmlPedanticParserDefault(int val); /** * Recovery mode diff --git a/parserInternals.h b/parserInternals.h index f0f7561c..8fd6ffc6 100644 --- a/parserInternals.h +++ b/parserInternals.h @@ -432,6 +432,12 @@ extern "C" { while ((*p) && (*(p) != '<')) (p)++ /** + * Not for the faint of heart + */ + +extern int xmlParserDebugEntities; + +/** * Parser context */ xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur); @@ -572,7 +572,8 @@ xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct; ************************************************************************/ void parseSAXFile(char *filename) { - htmlDocPtr doc; + htmlDocPtr doc = NULL; + /* * Empty callbacks for checking */ @@ -1024,14 +1024,15 @@ xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash) { *str = cur; return(-1); } - path[len] = '\0'; if (uri->path != NULL) memcpy(path, uri->path, len2); if (slash) { path[len2] = '/'; len2++; } - xmlURIUnescapeString(*str, cur - *str, &path[len2]); + path[len2] = 0; + if (cur - *str > 0) + xmlURIUnescapeString(*str, cur - *str, &path[len2]); if (uri->path != NULL) xmlFree(uri->path); uri->path = path; @@ -463,6 +463,7 @@ xmlCreateElementTable(void) { } ret->max_elements = XML_MIN_ELEMENT_TABLE; ret->nb_elements = 0; + ret->last = 0; ret->table = (xmlElementPtr *) xmlMalloc(ret->max_elements * sizeof(xmlElementPtr)); if (ret->table == NULL) { @@ -978,6 +979,7 @@ xmlScanIDAttributeDecl(xmlValidCtxtPtr ctxt, xmlElementPtr elem) { * @tree: if it's an enumeration, the associated list * * Register a new attribute declaration + * Note that @tree becomes the ownership of the DTD * * Returns NULL if not new, othervise the attribute decl */ @@ -993,14 +995,17 @@ xmlAddAttributeDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd, const xmlChar *elem, if (dtd == NULL) { fprintf(stderr, "xmlAddAttributeDecl: dtd == NULL\n"); + xmlFreeEnumeration(tree); return(NULL); } if (name == NULL) { fprintf(stderr, "xmlAddAttributeDecl: name == NULL\n"); + xmlFreeEnumeration(tree); return(NULL); } if (elem == NULL) { fprintf(stderr, "xmlAddAttributeDecl: elem == NULL\n"); + xmlFreeEnumeration(tree); return(NULL); } /* @@ -1029,6 +1034,7 @@ xmlAddAttributeDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd, const xmlChar *elem, break; default: fprintf(stderr, "xmlAddAttributeDecl: unknown type %d\n", type); + xmlFreeEnumeration(tree); return(NULL); } if ((defaultValue != NULL) && @@ -1054,19 +1060,59 @@ xmlAddAttributeDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd, const xmlChar *elem, /* * Validity Check: * Search the DTD for previous declarations of the ATTLIST + * The initial code used to walk the attribute table comparing + * all pairs of element/attribute names, and was far too slow + * for large DtDs, we now walk the attribute list associated to + * the element declaration instead if this declaration is found. */ - for (i = 0;i < table->nb_attributes;i++) { - cur = table->table[i]; - if ((ns != NULL) && (cur->prefix == NULL)) continue; - if ((ns == NULL) && (cur->prefix != NULL)) continue; - if ((!xmlStrcmp(cur->name, name)) && (!xmlStrcmp(cur->elem, elem)) && - ((ns == NULL) || (!xmlStrcmp(cur->prefix, ns)))) { - /* - * The attribute is already defined in this Dtd. - */ - VWARNING(ctxt->userData, "Attribute %s on %s: already defined\n", - elem, name); - return(NULL); + elemDef = xmlGetDtdElementDesc(dtd, elem); + if (elemDef != NULL) { + /* + * follow the attribute list. + */ + cur = elemDef->attributes; + while (cur != NULL) { + if ((ns != NULL) && (cur->prefix == NULL)) { + cur = cur->nexth; + continue; + } + if ((ns == NULL) && (cur->prefix != NULL)) { + cur = cur->nexth; + continue; + } + if ((!xmlStrcmp(cur->name, name)) && + ((ns == NULL) || (!xmlStrcmp(cur->prefix, ns)))) { + /* + * The attribute is already defined in this Dtd. + */ + VWARNING(ctxt->userData, + "Attribute %s on %s: already defined\n", + name, elem); + xmlFreeEnumeration(tree); + return(NULL); + } + cur = cur->nexth; + } + } else { + /* + * Walk down the attribute table. + */ + for (i = 0;i < table->nb_attributes;i++) { + cur = table->table[i]; + if ((ns != NULL) && (cur->prefix == NULL)) continue; + if ((ns == NULL) && (cur->prefix != NULL)) continue; + if ((!xmlStrcmp(cur->name, name)) && + (!xmlStrcmp(cur->elem, elem)) && + ((ns == NULL) || (!xmlStrcmp(cur->prefix, ns)))) { + /* + * The attribute is already defined in this Dtd. + */ + VWARNING(ctxt->userData, + "Attribute %s on %s: already defined\n", + elem, name); + xmlFreeEnumeration(tree); + return(NULL); + } } } @@ -1106,7 +1152,6 @@ xmlAddAttributeDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd, const xmlChar *elem, ret->tree = tree; if (defaultValue != NULL) ret->defaultValue = xmlStrdup(defaultValue); - elemDef = xmlGetDtdElementDesc(dtd, elem); if (elemDef != NULL) { if ((type == XML_ATTRIBUTE_ID) && (xmlScanIDAttributeDecl(NULL, elemDef) != 0)) @@ -2166,10 +2211,17 @@ xmlGetDtdElementDesc(xmlDtdPtr dtd, const xmlChar *name) { if (dtd->elements == NULL) return(NULL); table = (xmlElementTablePtr) dtd->elements; + if ((table->last >= 0) && (table->last < table->nb_elements)) { + cur = table->table[table->last]; + if (!xmlStrcmp(cur->name, name)) + return(cur); + } for (i = 0;i < table->nb_elements;i++) { cur = table->table[i]; - if (!xmlStrcmp(cur->name, name)) + if (!xmlStrcmp(cur->name, name)) { + table->last = i; return(cur); + } } /* @@ -67,6 +67,7 @@ struct _xmlElementTable { int nb_elements; /* number of elements stored */ int max_elements; /* maximum number of elements */ xmlElementPtr *table; /* the table of elements */ + int last; /* last element accessed */ }; /* @@ -564,7 +564,7 @@ void parseAndPrintFile(char *filename) { #ifdef LIBXML_DEBUG_ENABLED if ((debugent) && (!html)) - xmlDebugDumpEntities(stdout, doc); + xmlDebugDumpEntities(stderr, doc); #endif /* @@ -582,8 +582,6 @@ int main(int argc, char **argv) { #ifdef LIBXML_DEBUG_ENABLED if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug"))) debug++; - else if ((!strcmp(argv[i], "-debugent")) || (!strcmp(argv[i], "--debugent"))) - debugent++; else if ((!strcmp(argv[i], "-shell")) || (!strcmp(argv[i], "--shell"))) { shell++; @@ -644,11 +642,26 @@ int main(int argc, char **argv) { else if ((!strcmp(argv[i], "-nowarning")) || (!strcmp(argv[i], "--nowarning"))) { xmlGetWarningsDefaultValue = 0; + xmlPedanticParserDefault(0); + } + else if ((!strcmp(argv[i], "-pedantic")) || + (!strcmp(argv[i], "--pedantic"))) { + xmlGetWarningsDefaultValue = 1; + xmlPedanticParserDefault(1); } + else if ((!strcmp(argv[i], "-debugent")) || + (!strcmp(argv[i], "--debugent"))) { + debugent++; + xmlParserDebugEntities = 1; + } else if ((!strcmp(argv[i], "-encode")) || (!strcmp(argv[i], "--encode"))) { i++; encoding = argv[i]; + /* + * OK it's for testing purposes + */ + xmlAddEncodingAlias("UTF-8", "DVEnc"); } else if ((!strcmp(argv[i], "-noblanks")) || (!strcmp(argv[i], "--noblanks"))) { |