/* * parser.c : an XML 1.0 parser, namespaces and validity support are mostly * implemented on top of the SAX interfaces * * References: * The XML specification: * http://www.w3.org/TR/REC-xml * Original 1.0 version: * http://www.w3.org/TR/1998/REC-xml-19980210 * XML second edition working draft * http://www.w3.org/TR/2000/WD-xml-2e-20000814 * * Okay this is a big file, the parser core is around 7000 lines, then it * is followed by the progressive parser top routines, then the various * high level APIs to call the parser and a few miscellaneous functions. * A number of helper functions and deprecated ones have been moved to * parserInternals.c to reduce this file size. * As much as possible the functions are associated with their relative * production in the XML specification. A few productions defining the * different ranges of character are actually implanted either in * parserInternals.h or parserInternals.c * The DOM tree build is realized from the default SAX callbacks in * the module SAX.c. * The routines doing the validation checks are in valid.c and called either * from the SAX callbacks or as standalone functions using a preparsed * document. * * See Copyright for the status of this software. * * daniel@veillard.com */ #define IN_LIBXML #include "libxml.h" #if defined(WIN32) && !defined (__CYGWIN__) #define XML_DIR_SEP '\\' #else #define XML_DIR_SEP '/' #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef LIBXML_CATALOG_ENABLED #include #endif #ifdef HAVE_CTYPE_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_SYS_STAT_H #include #endif #ifdef HAVE_FCNTL_H #include #endif #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_ZLIB_H #include #endif #define XML_PARSER_BIG_BUFFER_SIZE 300 #define XML_PARSER_BUFFER_SIZE 100 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" /* * List of XML prefixed PI allowed by W3C specs */ static const char *xmlW3CPIs[] = { "xml-stylesheet", NULL }; /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str); static int xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, xmlSAXHandlerPtr sax, void *user_data, int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list); static void xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, xmlNodePtr lastNode); /************************************************************************ * * * Parser stacks related functions and macros * * * ************************************************************************/ xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str); /* * Generic function for accessing stacks in the Parser Context */ #define PUSH_AND_POP(scope, type, name) \ scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \ if (ctxt->name##Nr >= ctxt->name##Max) { \ ctxt->name##Max *= 2; \ ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \ ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ if (ctxt->name##Tab == NULL) { \ xmlGenericError(xmlGenericErrorContext, \ "realloc failed !\n"); \ return(0); \ } \ } \ ctxt->name##Tab[ctxt->name##Nr] = value; \ ctxt->name = value; \ return(ctxt->name##Nr++); \ } \ scope type name##Pop(xmlParserCtxtPtr ctxt) { \ type ret; \ if (ctxt->name##Nr <= 0) return(0); \ ctxt->name##Nr--; \ if (ctxt->name##Nr > 0) \ ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ else \ ctxt->name = NULL; \ ret = ctxt->name##Tab[ctxt->name##Nr]; \ ctxt->name##Tab[ctxt->name##Nr] = 0; \ return(ret); \ } \ /** * inputPop: * @ctxt: an XML parser context * * Pops the top parser input from the input stack * * Returns the input just removed */ /** * inputPush: * @ctxt: an XML parser context * @value: the parser input * * Pushes a new parser input on top of the input stack * * Returns 0 in case of error, the index in the stack otherwise */ /** * namePop: * @ctxt: an XML parser context * * Pops the top element name from the name stack * * Returns the name just removed */ /** * namePush: * @ctxt: an XML parser context * @value: the element name * * Pushes a new element name on top of the name stack * * Returns 0 in case of error, the index in the stack otherwise */ /** * nodePop: * @ctxt: an XML parser context * * Pops the top element node from the node stack * * Returns the node just removed */ /** * nodePush: * @ctxt: an XML parser context * @value: the element node * * Pushes a new element node on top of the node stack * * Returns 0 in case of error, the index in the stack otherwise */ /* * Those macros actually generate the functions */ PUSH_AND_POP(extern, xmlParserInputPtr, input) PUSH_AND_POP(extern, xmlNodePtr, node) PUSH_AND_POP(extern, xmlChar*, name) static int spacePush(xmlParserCtxtPtr ctxt, int val) { if (ctxt->spaceNr >= ctxt->spaceMax) { ctxt->spaceMax *= 2; ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); if (ctxt->spaceTab == NULL) { xmlGenericError(xmlGenericErrorContext, "realloc failed !\n"); return(0); } } ctxt->spaceTab[ctxt->spaceNr] = val; ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; return(ctxt->spaceNr++); } static int spacePop(xmlParserCtxtPtr ctxt) { int ret; if (ctxt->spaceNr <= 0) return(0); ctxt->spaceNr--; if (ctxt->spaceNr > 0) ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; else ctxt->space = NULL; ret = ctxt->spaceTab[ctxt->spaceNr]; ctxt->spaceTab[ctxt->spaceNr] = -1; return(ret); } /* * Macros for accessing the content. Those should be used only by the parser, * and not exported. * * Dirty macros, i.e. one often need to make assumption on the context to * use them * * CUR_PTR return the current pointer to the xmlChar to be parsed. * To be used with extreme caution since operations consuming * characters may move the input buffer to a different location ! * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled * This should be used internally by the parser * only to compare to ASCII values otherwise it would break when * running with UTF-8 encoding. * RAW same as CUR but in the input buffer, bypass any token * extraction that may have been done * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only * to compare on ASCII based substring. * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined * strings within the parser. * * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding * * NEXT Skip to the next character, this does the proper decoding * in UTF-8 mode. It also pop-up unfinished entities on the fly. * NEXTL(l) Skip l xmlChar in the input buffer * CUR_CHAR(l) returns the current unicode character (int), set l * to the number of xmlChars used for the encoding [0-5]. * CUR_SCHAR same but operate on a string instead of the context * COPY_BUF copy the current unicode char to the target buffer, increment * the index * GROW, SHRINK handling of input buffers */ #define RAW (*ctxt->input->cur) #define CUR (*ctxt->input->cur) #define NXT(val) ctxt->input->cur[(val)] #define CUR_PTR ctxt->input->cur #define SKIP(val) do { \ ctxt->nbChars += (val),ctxt->input->cur += (val); \ if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ if ((*ctxt->input->cur == 0) && \ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ xmlPopInput(ctxt); \ } while (0) #define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) \ xmlSHRINK (ctxt); static void xmlSHRINK (xmlParserCtxtPtr ctxt) { xmlParserInputShrink(ctxt->input); if ((*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) xmlPopInput(ctxt); } #define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \ xmlGROW (ctxt); static void xmlGROW (xmlParserCtxtPtr ctxt) { xmlParserInputGrow(ctxt->input, INPUT_CHUNK); if ((*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) xmlPopInput(ctxt); } #define SKIP_BLANKS xmlSkipBlankChars(ctxt) #define NEXT xmlNextChar(ctxt) #define NEXT1 { \ ctxt->input->cur++; \ ctxt->nbChars++; \ if (*ctxt->input->cur == 0) \ xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ } #define NEXTL(l) do { \ if (*(ctxt->input->cur) == '\n') { \ ctxt->input->line++; ctxt->input->col = 1; \ } else ctxt->input->col++; \ ctxt->input->cur += l; \ if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ } while (0) #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) #define COPY_BUF(l,b,i,v) \ if (l == 1) b[i++] = (xmlChar) v; \ else i += xmlCopyCharMultiByte(&b[i],v) /** * xmlSkipBlankChars: * @ctxt: the XML parser context * * skip all blanks character found at that point in the input streams. * It pops up finished entities in the process if allowable at that point. * * Returns the number of space chars skipped */ int xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { int res = 0; /* * It's Okay to use CUR/NEXT here since all the blanks are on * the ASCII range. */ if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { const xmlChar *cur; /* * if we are in the document content, go really fast */ cur = ctxt->input->cur; while (IS_BLANK(*cur)) { if (*cur == '\n') { ctxt->input->line++; ctxt->input->col = 1; } cur++; res++; if (*cur == 0) { ctxt->input->cur = cur; xmlParserInputGrow(ctxt->input, INPUT_CHUNK); cur = ctxt->input->cur; } } ctxt->input->cur = cur; } else { int cur; do { cur = CUR; while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ NEXT; cur = CUR; res++; } while ((cur == 0) && (ctxt->inputNr > 1) && (ctxt->instate != XML_PARSER_COMMENT)) { xmlPopInput(ctxt); cur = CUR; } /* * Need to handle support of entities branching here */ if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ } return(res); } /************************************************************************ * * * Commodity functions to handle entities * * * ************************************************************************/ /** * xmlPopInput: * @ctxt: an XML parser context * * xmlPopInput: the current input pointed by ctxt->input came to an end * pop it and return the next char. * * Returns the current xmlChar in the parser context */ xmlChar xmlPopInput(xmlParserCtxtPtr ctxt) { if (ctxt->inputNr == 1) return(0); /* End of main Input */ if (xmlParserDebugEntities) xmlGenericError(xmlGenericErrorContext, "Popping input %d\n", ctxt->inputNr); xmlFreeInputStream(inputPop(ctxt)); if ((*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) return(xmlPopInput(ctxt)); return(CUR); } /** * xmlPushInput: * @ctxt: an XML parser context * @input: an XML parser input fragment (entity, XML fragment ...). * * xmlPushInput: switch to a new input stream which is stacked on top * of the previous one(s). */ void xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { if (input == NULL) return; if (xmlParserDebugEntities) { if ((ctxt->input != NULL) && (ctxt->input->filename)) xmlGenericError(xmlGenericErrorContext, "%s(%d): ", ctxt->input->filename, ctxt->input->line); xmlGenericError(xmlGenericErrorContext, "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); } inputPush(ctxt, input); GROW; } /** * xmlParseCharRef: * @ctxt: an XML parser context * * parse Reference declarations * * [66] CharRef ::= '&#' [0-9]+ ';' | * '&#x' [0-9a-fA-F]+ ';' * * [ WFC: Legal Character ] * Characters referred to using character references must match the * production for Char. * * Returns the value parsed (as an int), 0 in case of error */ int xmlParseCharRef(xmlParserCtxtPtr ctxt) { unsigned int val = 0; int count = 0; /* * Using RAW/CUR/NEXT is okay since we are working on ASCII range here */ if ((RAW == '&') && (NXT(1) == '#') && (NXT(2) == 'x')) { SKIP(3); GROW; while (RAW != ';') { /* loop blocked by count */ if (count++ > 20) { count = 0; GROW; } if ((RAW >= '0') && (RAW <= '9')) val = val * 16 + (CUR - '0'); else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) val = val * 16 + (CUR - 'a') + 10; else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) val = val * 16 + (CUR - 'A') + 10; else { ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseCharRef: invalid hexadecimal value\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; val = 0; break; } NEXT; count++; } if (RAW == ';') { /* on purpose to avoid reentrancy problems with NEXT and SKIP */ ctxt->nbChars ++; ctxt->input->cur++; } } else if ((RAW == '&') && (NXT(1) == '#')) { SKIP(2); GROW; while (RAW != ';') { /* loop blocked by count */ if (count++ > 20) { count = 0; GROW; } if ((RAW >= '0') && (RAW <= '9')) val = val * 10 + (CUR - '0'); else { ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseCharRef: invalid decimal value\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; val = 0; break; } NEXT; count++; } if (RAW == ';') { /* on purpose to avoid reentrancy problems with NEXT and SKIP */ ctxt->nbChars ++; ctxt->input->cur++; } } else { ctxt->errNo = XML_ERR_INVALID_CHARREF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseCharRef: invalid value\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } /* * [ WFC: Legal Character ] * Characters referred to using character references must match the * production for Char. */ if (IS_CHAR(val)) { return(val); } else { ctxt->errNo = XML_ERR_INVALID_CHAR; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseCharRef: invalid xmlChar value %d\n", val); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } return(0); } /** * xmlParseStringCharRef: * @ctxt: an XML parser context * @str: a pointer to an index in the string * * parse Reference declarations, variant parsing from a string rather * than an an input flow. * * [66] CharRef ::= '&#' [0-9]+ ';' | * '&#x' [0-9a-fA-F]+ ';' * * [ WFC: Legal Character ] * Characters referred to using character references must match the * production for Char. * * Returns the value parsed (as an int), 0 in case of error, str will be * updated to the current value of the index */ static int xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { const xmlChar *ptr; xmlChar cur; int val = 0; if ((str == NULL) || (*str == NULL)) return(0); ptr = *str; cur = *ptr; if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { ptr += 3; cur = *ptr; while (cur != ';') { /* Non input consuming loop */ if ((cur >= '0') && (cur <= '9')) val = val * 16 + (cur - '0'); else if ((cur >= 'a') && (cur <= 'f')) val = val * 16 + (cur - 'a') + 10; else if ((cur >= 'A') && (cur <= 'F')) val = val * 16 + (cur - 'A') + 10; else { ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseStringCharRef: invalid hexadecimal value\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; val = 0; break; } ptr++; cur = *ptr; } if (cur == ';') ptr++; } else if ((cur == '&') && (ptr[1] == '#')){ ptr += 2; cur = *ptr; while (cur != ';') { /* Non input consuming loops */ if ((cur >= '0') && (cur <= '9')) val = val * 10 + (cur - '0'); else { ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseStringCharRef: invalid decimal value\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; val = 0; break; } ptr++; cur = *ptr; } if (cur == ';') ptr++; } else { ctxt->errNo = XML_ERR_INVALID_CHARREF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseStringCharRef: invalid value\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return(0); } *str = ptr; /* * [ WFC: Legal Character ] * Characters referred to using character references must match the * production for Char. */ if (IS_CHAR(val)) { return(val); } else { ctxt->errNo = XML_ERR_INVALID_CHAR; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseStringCharRef: invalid xmlChar value %d\n", val); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } return(0); } /** * xmlNewBlanksWrapperInputStream: * @ctxt: an XML parser context * @entity: an Entity pointer * * Create a new input stream for wrapping * blanks around a PEReference * * Returns the new input stream or NULL */ static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} static xmlParserInputPtr xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { xmlParserInputPtr input; xmlChar *buffer; size_t length; if (entity == NULL) { ctxt->errNo = XML_ERR_INTERNAL_ERROR; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "internal: xmlNewBlanksWrapperInputStream entity = NULL\n"); ctxt->errNo = XML_ERR_INTERNAL_ERROR; return(NULL); } if (xmlParserDebugEntities) xmlGenericError(xmlGenericErrorContext, "new blanks wrapper for entity: %s\n", entity->name); input = xmlNewInputStream(ctxt); if (input == NULL) { return(NULL); } length = xmlStrlen(entity->name) + 5; buffer = xmlMalloc(length); if (buffer == NULL) { return(NULL); } buffer [0] = ' '; buffer [1] = '%'; buffer [length-3] = ';'; buffer [length-2] = ' '; buffer [length-1] = 0; memcpy(buffer + 2, entity->name, length - 5); input->free = deallocblankswrapper; input->base = buffer; input->cur = buffer; input->length = length; input->end = &buffer[length]; return(input); } /** * xmlParserHandlePEReference: * @ctxt: the parser context * * [69] PEReference ::= '%' Name ';' * * [ WFC: No Recursion ] * A parsed entity must not contain a recursive * reference to itself, either directly or indirectly. * * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an internal DTD * subset which contains no parameter entity references, or a document * with "standalone='yes'", ... ... The declaration of a parameter * entity must precede any reference to it... * * [ VC: Entity Declared ] * In a document with an external subset or external parameter entities * with "standalone='no'", ... ... The declaration of a parameter entity * must precede any reference to it... * * [ WFC: In DTD ] * Parameter-entity references may only appear in the DTD. * NOTE: misleading but this is handled. * * A PEReference may have been detected in the current input stream * the handling is done accordingly to * http://www.w3.org/TR/REC-xml#entproc * i.e. * - Included in literal in entity values * - Included as Parameter Entity reference within DTDs */ void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { xmlChar *name; xmlEntityPtr entity = NULL; xmlParserInputPtr input; if (RAW != '%') return; switch(ctxt->instate) { case XML_PARSER_CDATA_SECTION: return; case XML_PARSER_COMMENT: return; case XML_PARSER_START_TAG: return; case XML_PARSER_END_TAG: return; case XML_PARSER_EOF: ctxt->errNo = XML_ERR_PEREF_AT_EOF; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "PEReference at EOF\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return; case XML_PARSER_PROLOG: case XML_PARSER_START: case XML_PARSER_MISC: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return; case XML_PARSER_ENTITY_DECL: case XML_PARSER_CONTENT: case XML_PARSER_ATTRIBUTE_VALUE: case XML_PARSER_PI: case XML_PARSER_SYSTEM_LITERAL: case XML_PARSER_PUBLIC_LITERAL: /* we just ignore it there */ return; case XML_PARSER_EPILOG: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return; case XML_PARSER_ENTITY_VALUE: /* * NOTE: in the case of entity values, we don't do the * substitution here since we need the literal * entity value to be able to save the internal * subset of the document. * This will be handled by xmlStringDecodeEntities */ return; case XML_PARSER_DTD: /* * [WFC: Well-Formedness Constraint: PEs in Internal Subset] * In the internal DTD subset, parameter-entity references * can occur only where markup declarations can occur, not * within markup declarations. * In that case this is handled in xmlParseMarkupDecl */ if ((ctxt->external == 0) && (ctxt->inputNr == 1)) return; if (IS_BLANK(NXT(1)) || NXT(1) == 0) return; break; case XML_PARSER_IGNORE: return; } NEXT; name = xmlParseName(ctxt); if (xmlParserDebugEntities) xmlGenericError(xmlGenericErrorContext, "PEReference: %s\n", name); if (name == NULL) { ctxt->errNo = XML_ERR_PEREF_NO_NAME; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } else { if (RAW == ';') { NEXT; if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) entity = ctxt->sax->getParameterEntity(ctxt->userData, name); if (entity == NULL) { /* * [ WFC: Entity Declared ] * In a document without any DTD, a document with only an * internal DTD subset which contains no parameter entity * references, or a document with "standalone='yes'", ... * ... The declaration of a parameter entity must precede * any reference to it... */ if ((ctxt->standalone == 1) || ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "PEReference: %%%s; not found\n", name); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } else { /* * [ VC: Entity Declared ] * In a document with an external subset or external * parameter entities with "standalone='no'", ... * ... The declaration of a parameter entity must precede * any reference to it... */ if ((!ctxt->disableSAX) && (ctxt->validate) && (ctxt->vctxt.error != NULL)) { ctxt->vctxt.error(ctxt->vctxt.userData, "PEReference: %%%s; not found\n", name); } else if ((!ctxt->disableSAX) && (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) ctxt->sax->warning(ctxt->userData, "PEReference: %%%s; not found\n", name); ctxt->valid = 0; } } else if (ctxt->input->free != deallocblankswrapper) { input = xmlNewBlanksWrapperInputStream(ctxt, entity); xmlPushInput(ctxt, input); } else { if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { xmlChar start[4]; xmlCharEncoding enc; /* * handle the extra spaces added before and after * c.f. http://www.w3.org/TR/REC-xml#as-PE * this is done independently. */ input = xmlNewEntityInputStream(ctxt, entity); xmlPushInput(ctxt, input); /* * Get the 4 first bytes and decode the charset * if enc != XML_CHAR_ENCODING_NONE * plug some encoding conversion routines. */ GROW if (entity->length >= 4) { start[0] = RAW; start[1] = NXT(1); start[2] = NXT(2); start[3] = NXT(3); enc = xmlDetectCharEncoding(start, 4); if (enc != XML_CHAR_ENCODING_NONE) { xmlSwitchEncoding(ctxt, enc); } } if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && (RAW == '<') && (NXT(1) == '?') && (NXT(2) == 'x') && (NXT(3) == 'm') && (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { xmlParseTextDecl(ctxt); } } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: %s is not a parameter entity\n", name); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } } } else { ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: expecting ';'\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } xmlFree(name); } } /* * Macro used to grow the current buffer. */ #define growBuffer(buffer) { \ buffer##_size *= 2; \ buffer = (xmlChar *) \ xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ if (buffer == NULL) { \ xmlGenericError(xmlGenericErrorContext, "realloc failed"); \ return(NULL); \ } \ } /** * xmlStringDecodeEntities: * @ctxt: the parser context * @str: the input string * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF * @end: an end marker xmlChar, 0 if none * @end2: an end marker xmlChar, 0 if none * @end3: an end marker xmlChar, 0 if none * * Takes a entity string content and process to do the adequate substitutions. * * [67] Reference ::= EntityRef | CharRef * * [69] PEReference ::= '%' Name ';' * * Returns A newly allocated string with the substitution done. The caller * must deallocate it ! */ xmlChar * xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, xmlChar end, xmlChar end2, xmlChar end3) { xmlChar *buffer = NULL; int buffer_size = 0; xmlChar *current = NULL; xmlEntityPtr ent; int c,l; int nbchars = 0; if (str == NULL) return(NULL); if (ctxt->depth > 40) { ctxt->errNo = XML_ERR_ENTITY_LOOP; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Detected entity reference loop\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return(NULL); } /* * allocate a translation buffer. */ buffer_size = XML_PARSER_BIG_BUFFER_SIZE; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { xmlGenericError(xmlGenericErrorContext, "xmlStringDecodeEntities: malloc failed"); return(NULL); } /* * OK loop until we reach one of the ending char or a size limit. * we are operating on already parsed values. */ c = CUR_SCHAR(str, l); while ((c != 0) && (c != end) && /* non input consuming loop */ (c != end2) && (c != end3)) { if (c == 0) break; if ((c == '&') && (str[1] == '#')) { int val = xmlParseStringCharRef(ctxt, &str); if (val != 0) { COPY_BUF(0,buffer,nbchars,val); } } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { if (xmlParserDebugEntities) xmlGenericError(xmlGenericErrorContext, "String decoding Entity Reference: %.30s\n", str); ent = xmlParseStringEntityRef(ctxt, &str); if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { if (ent->content != NULL) { COPY_BUF(0,buffer,nbchars,ent->content[0]); } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "internal error entity has no content\n"); } } else if ((ent != NULL) && (ent->content != NULL)) { xmlChar *rep; ctxt->depth++; rep = xmlStringDecodeEntities(ctxt, ent->content, what, 0, 0, 0); ctxt->depth--; if (rep != NULL) { current = rep; while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { growBuffer(buffer); } } xmlFree(rep); } } else if (ent != NULL) { int i = xmlStrlen(ent->name); const xmlChar *cur = ent->name; buffer[nbchars++] = '&'; if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { growBuffer(buffer); } for (;i > 0;i--) buffer[nbchars++] = *cur++; buffer[nbchars++] = ';'; } } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { if (xmlParserDebugEntities) xmlGenericError(xmlGenericErrorContext, "String decoding PE Reference: %.30s\n", str); ent = xmlParseStringPEReference(ctxt, &str); if (ent != NULL) { xmlChar *rep; ctxt->depth++; rep = xmlStringDecodeEntities(ctxt, ent->content, what, 0, 0, 0); ctxt->depth--; if (rep != NULL) { current = rep; while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { growBuffer(buffer); } } xmlFree(rep); } } } else { COPY_BUF(l,buffer,nbchars,c); str += l; if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { growBuffer(buffer); } } c = CUR_SCHAR(str, l); } buffer[nbchars++] = 0; return(buffer); } /************************************************************************ * * * Commodity functions to handle xmlChars * * * ************************************************************************/ /** * xmlStrndup: * @cur: the input xmlChar * * @len: the len of @cur * * a strndup for array of xmlChar's * * Returns a new xmlChar * or NULL */ xmlChar * xmlStrndup(const xmlChar *cur, int len) { xmlChar *ret; if ((cur == NULL) || (len < 0)) return(NULL); ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); if (ret == NULL) { xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n", (len + 1) * (long)sizeof(xmlChar)); return(NULL); } memcpy(ret, cur, len * sizeof(xmlChar)); ret[len] = 0; return(ret); } /** * xmlStrdup: * @cur: the input xmlChar * * * a strdup for array of xmlChar's. Since they are supposed to be * encoded in UTF-8 or an encoding with 8bit based chars, we assume * a termination mark of '0'. * * Returns a new xmlChar * or NULL */ xmlChar * xmlStrdup(const xmlChar *cur) { const xmlChar *p = cur; if (cur == NULL) return(NULL); while (*p != 0) p++; /* non input consuming */ return(xmlStrndup(cur, p - cur)); } /** * xmlCharStrndup: * @cur: the input char * * @len: the len of @cur * * a strndup for char's to xmlChar's * * Returns a new xmlChar * or NULL */ xmlChar * xmlCharStrndup(const char *cur, int len) { int i; xmlChar *ret; if ((cur == NULL) || (len < 0)) return(NULL); ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); if (ret == NULL) { xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n", (len + 1) * (long)sizeof(xmlChar)); return(NULL); } for (i = 0;i < len;i++) ret[i] = (xmlChar) cur[i]; ret[len] = 0; return(ret); } /** * xmlCharStrdup: * @cur: the input char * * @len: the len of @cur * * a strdup for char's to xmlChar's * * Returns a new xmlChar * or NULL */ xmlChar * xmlCharStrdup(const char *cur) { const char *p = cur; if (cur == NULL) return(NULL); while (*p != '\0') p++; /* non input consuming */ return(xmlCharStrndup(cur, p - cur)); } /** * xmlStrcmp: * @str1: the first xmlChar * * @str2: the second xmlChar * * * a strcmp for xmlChar's * * Returns the integer result of the comparison */ int xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { register int tmp; if (str1 == str2) return(0); if (str1 == NULL) return(-1); if (str2 == NULL) return(1); do { tmp = *str1++ - *str2; if (tmp != 0) return(tmp); } while (*str2++ != 0); return 0; } /** * xmlStrEqual: * @str1: the first xmlChar * * @str2: the second xmlChar * * * Check if both string are equal of have same content * Should be a bit more readable and faster than xmlStrEqual() * * Returns 1 if they are equal, 0 if they are different */ int xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { if (str1 == str2) return(1); if (str1 == NULL) return(0); if (str2 == NULL) return(0); do { if (*str1++ != *str2) return(0); } while (*str2++); return(1); } /** * xmlStrncmp: * @str1: the first xmlChar * * @str2: the second xmlChar * * @len: the max comparison length * * a strncmp for xmlChar's * * Returns the integer result of the comparison */ int xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { register int tmp; if (len <= 0) return(0); if (str1 == str2) return(0); if (str1 == NULL) return(-1); if (str2 == NULL) return(1); do { tmp = *str1++ - *str2; if (tmp != 0 || --len == 0) return(tmp); } while (*str2++ != 0); return 0; } static const xmlChar casemap[256] = { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F, 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF }; /** * xmlStrcasecmp: * @str1: the first xmlChar * * @str2: the second xmlChar * * * a strcasecmp for xmlChar's * * Returns the integer result of the comparison */ int xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { register int tmp; if (str1 == str2) return(0); if (str1 == NULL) return(-1); if (str2 == NULL) return(1); do { tmp = casemap[*str1++] - casemap[*str2]; if (tmp != 0) return(tmp); } while (*str2++ != 0); return 0; } /** * xmlStrncasecmp: * @str1: the first xmlChar * * @str2: the second xmlChar * * @len: the max comparison length * * a strncasecmp for xmlChar's * * Returns the integer result of the comparison */ int xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { register int tmp; if (len <= 0) return(0); if (str1 == str2) return(0); if (str1 == NULL) return(-1); if (str2 == NULL) return(1); do { tmp = casemap[*str1++] - casemap[*str2]; if (tmp != 0 || --len == 0) return(tmp); } while (*str2++ != 0); return 0; } /** * xmlStrchr: * @str: the xmlChar * array * @val: the xmlChar to search * * a strchr for xmlChar's * * Returns the xmlChar * for the first occurrence or NULL. */ const xmlChar * xmlStrchr(const xmlChar *str, xmlChar val) { if (str == NULL) return(NULL); while (*str != 0) { /* non input consuming */ if (*str == val) return((xmlChar *) str); str++; } return(NULL); } /** * xmlStrstr: * @str: the xmlChar * array (haystack) * @val: the xmlChar to search (needle) * * a strstr for xmlChar's * * Returns the xmlChar * for the first occurrence or NULL. */ const xmlChar * xmlStrstr(const xmlChar *str, const xmlChar *val) { int n; if (str == NULL) return(NULL); if (val == NULL) return(NULL); n = xmlStrlen(val); if (n == 0) return(str); while (*str != 0) { /* non input consuming */ if (*str == *val) { if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); } str++; } return(NULL); } /** * xmlStrcasestr: * @str: the xmlChar * array (haystack) * @val: the xmlChar to search (needle) * * a case-ignoring strstr for xmlChar's * * Returns the xmlChar * for the first occurrence or NULL. */ const xmlChar * xmlStrcasestr(const xmlChar *str, xmlChar *val) { int n; if (str == NULL) return(NULL); if (val == NULL) return(NULL); n = xmlStrlen(val); if (n == 0) return(str); while (*str != 0) { /* non input consuming */ if (casemap[*str] == casemap[*val]) if (!xmlStrncasecmp(str, val, n)) return(str); str++; } return(NULL); } /** * xmlStrsub: * @str: the xmlChar * array (haystack) * @start: the index of the first char (zero based) * @len: the length of the substring * * Extract a substring of a given string * * Returns the xmlChar * for the first occurrence or NULL. */ xmlChar * xmlStrsub(const xmlChar *str, int start, int len) { int i; if (str == NULL) return(NULL); if (start < 0) return(NULL); if (len < 0) return(NULL); for (i = 0;i < start;i++) { if (*str == 0) return(NULL); str++; } if (*str == 0) return(NULL); return(xmlStrndup(str, len)); } /** * xmlStrlen: * @str: the xmlChar * array * * length of a xmlChar's string * * Returns the number of xmlChar contained in the ARRAY. */ int xmlStrlen(const xmlChar *str) { int len = 0; if (str == NULL) return(0); while (*str != 0) { /* non input consuming */ str++; len++; } return(len); } /** * xmlStrncat: * @cur: the original xmlChar * array * @add: the xmlChar * array added * @len: the length of @add * * a strncat for array of xmlChar's, it will extend @cur with the len * first bytes of @add. * * Returns a new xmlChar *, the original @cur is reallocated if needed * and should not be freed */ xmlChar * xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { int size; xmlChar *ret; if ((add == NULL) || (len == 0)) return(cur); if (cur == NULL) return(xmlStrndup(add, len)); size = xmlStrlen(cur); ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); if (ret == NULL) { xmlGenericError(xmlGenericErrorContext, "xmlStrncat: realloc of %ld byte failed\n", (size + len + 1) * (long)sizeof(xmlChar)); return(cur); } memcpy(&ret[size], add, len * sizeof(xmlChar)); ret[size + len] = 0; return(ret); } /** * xmlStrcat: * @cur: the original xmlChar * array * @add: the xmlChar * array added * * a strcat for array of xmlChar's. Since they are supposed to be * encoded in UTF-8 or an encoding with 8bit based chars, we assume * a termination mark of '0'. * * Returns a new xmlChar * containing the concatenated string. */ xmlChar * xmlStrcat(xmlChar *cur, const xmlChar *add) { const xmlChar *p = add; if (add == NULL) return(cur); if (cur == NULL) return(xmlStrdup(add)); while (*p != 0) p++; /* non input consuming */ return(xmlStrncat(cur, add, p - add)); } /************************************************************************ * * * Commodity functions, cleanup needed ? * * * ************************************************************************/ /** * areBlanks: * @ctxt: an XML parser context * @str: a xmlChar * * @len: the size of @str * * Is this a sequence of blank chars that one can ignore ? * * Returns 1 if ignorable 0 otherwise. */ static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { int i, ret; xmlNodePtr lastChild; /* * Don't spend time trying to differentiate them, the same callback is * used ! */ if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) return(0); /* * Check for xml:space value. */ if (*(ctxt->space) == 1) return(0); /* * Check that the string is made of blanks */ for (i = 0;i < len;i++) if (!(IS_BLANK(str[i]))) return(0); /* * Look if the element is mixed content in the DTD if available */ if (ctxt->node == NULL) return(0); if (ctxt->myDoc != NULL) { ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); if (ret == 0) return(1); if (ret == 1) return(0); } /* * Otherwise, heuristic :-\ */ if (RAW != '<') return(0); if ((ctxt->node->children == NULL) && (RAW == '<') && (NXT(1) == '/')) return(0); lastChild = xmlGetLastChild(ctxt->node); if (lastChild == NULL) { if ((ctxt->node->type != XML_ELEMENT_NODE) && (ctxt->node->content != NULL)) return(0); } else if (xmlNodeIsText(lastChild)) return(0); else if ((ctxt->node->children != NULL) && (xmlNodeIsText(ctxt->node->children))) return(0); return(1); } /************************************************************************ * * * Extra stuff for namespace support * * Relates to http://www.w3.org/TR/WD-xml-names * * * ************************************************************************/ /** * xmlSplitQName: * @ctxt: an XML parser context * @name: an XML parser context * @prefix: a xmlChar ** * * parse an UTF8 encoded XML qualified name string * * [NS 5] QName ::= (Prefix ':')? LocalPart * * [NS 6] Prefix ::= NCName * * [NS 7] LocalPart ::= NCName * * Returns the local part, and prefix is updated * to get the Prefix if any. */ xmlChar * xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { xmlChar buf[XML_MAX_NAMELEN + 5]; xmlChar *buffer = NULL; int len = 0; int max = XML_MAX_NAMELEN; xmlChar *ret = NULL; const xmlChar *cur = name; int c; *prefix = NULL; #ifndef XML_XML_NAMESPACE /* xml: prefix is not really a namespace */ if ((cur[0] == 'x') && (cur[1] == 'm') && (cur[2] == 'l') && (cur[3] == ':')) return(xmlStrdup(name)); #endif /* nasty but valid */ if (cur[0] == ':') return(xmlStrdup(name)); c = *cur++; while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ buf[len++] = c; c = *cur++; } if (len >= max) { /* * Okay someone managed to make a huge name, so he's ready to pay * for the processing speed. */ max = len * 2; buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); if (buffer == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlSplitQName: out of memory\n"); return(NULL); } memcpy(buffer, buf, len); while ((c != 0) && (c != ':')) { /* tested bigname.xml */ if (len + 10 > max) { max *= 2; buffer = (xmlChar *) xmlRealloc(buffer, max * sizeof(xmlChar)); if (buffer == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlSplitQName: out of memory\n"); return(NULL); } } buffer[len++] = c; c = *cur++; } buffer[len] = 0; } if (buffer == NULL) ret = xmlStrndup(buf, len); else { ret = buffer; buffer = NULL; max = XML_MAX_NAMELEN; } if (c == ':') { c = *cur; if (c == 0) return(ret); *prefix = ret; len = 0; /* * Check that the first character is proper to start * a new name */ if (!(((c >= 0x61) && (c <= 0x7A)) || ((c >= 0x41) && (c <= 0x5A)) || (c == '_') || (c == ':'))) { int l; int first = CUR_SCHAR(cur, l); if (!IS_LETTER(first) && (first != '_')) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Name %s is not XML Namespace compliant\n", name); } } cur++; while ((c != 0) && (len < max)) { /* tested bigname2.xml */ buf[len++] = c; c = *cur++; } if (len >= max) { /* * Okay someone managed to make a huge name, so he's ready to pay * for the processing speed. */ max = len * 2; buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); if (buffer == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlSplitQName: out of memory\n"); return(NULL); } memcpy(buffer, buf, len); while (c != 0) { /* tested bigname2.xml */ if (len + 10 > max) { max *= 2; buffer = (xmlChar *) xmlRealloc(buffer, max * sizeof(xmlChar)); if (buffer == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlSplitQName: out of memory\n"); return(NULL); } } buffer[len++] = c; c = *cur++; } buffer[len] = 0; } if (buffer == NULL) ret = xmlStrndup(buf, len); else { ret = buffer; } } return(ret); } /************************************************************************ * * * The parser itself * * Relates to http://www.w3.org/TR/REC-xml * * * ************************************************************************/ static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); /** * xmlParseName: * @ctxt: an XML parser context * * parse an XML name. * * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | * CombiningChar | Extender * * [5] Name ::= (Letter | '_' | ':') (NameChar)* * * [6] Names ::= Name (S Name)* * * Returns the Name parsed or NULL */ xmlChar * xmlParseName(xmlParserCtxtPtr ctxt) { const xmlChar *in; xmlChar *ret; int count = 0; GROW; /* * Accelerator for simple ASCII names */ in = ctxt->input->cur; if (((*in >= 0x61) && (*in <= 0x7A)) || ((*in >= 0x41) && (*in <= 0x5A)) || (*in == '_') || (*in == ':')) { in++; while (((*in >= 0x61) && (*in <= 0x7A)) || ((*in >= 0x41) && (*in <= 0x5A)) || ((*in >= 0x30) && (*in <= 0x39)) || (*in == '_') || (*in == '-') || (*in == ':') || (*in == '.')) in++; if ((*in > 0) && (*in < 0x80)) { count = in - ctxt->input->cur; ret = xmlStrndup(ctxt->input->cur, count); ctxt->input->cur = in; return(ret); } } return(xmlParseNameComplex(ctxt)); } /** * xmlParseNameAndCompare: * @ctxt: an XML parser context * * parse an XML name and compares for match * (specialized for endtag parsing) * * * Returns NULL for an illegal name, (xmlChar*) 1 for success * and the name for mismatch */ static xmlChar * xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { const xmlChar *cmp = other; const xmlChar *in; xmlChar *ret; GROW; in = ctxt->input->cur; while (*in != 0 && *in == *cmp) { ++in; ++cmp; } if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) { /* success */ ctxt->input->cur = in; return (xmlChar*) 1; } /* failure (or end of input buffer), check with full function */ ret = xmlParseName (ctxt); if (ret != 0 && xmlStrEqual (ret, other)) { xmlFree (ret); return (xmlChar*) 1; } return ret; } static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt) { xmlChar buf[XML_MAX_NAMELEN + 5]; int len = 0, l; int c; int count = 0; /* * Handler for more complex cases */ GROW; c = CUR_CHAR(l); if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ (!IS_LETTER(c) && (c != '_') && (c != ':'))) { return(NULL); } while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c)))) { if (count++ > 100) { count = 0; GROW; } COPY_BUF(l,buf,len,c); NEXTL(l); c = CUR_CHAR(l); if (len >= XML_MAX_NAMELEN) { /* * Okay someone managed to make a huge name, so he's ready to pay * for the processing speed. */ xmlChar *buffer; int max = len * 2; buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); if (buffer == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseNameComplex: out of memory\n"); return(NULL); } memcpy(buffer, buf, len); while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */ (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) { if (count++ > 100) { count = 0; GROW; } if (len + 10 > max) { max *= 2; buffer = (xmlChar *) xmlRealloc(buffer, max * sizeof(xmlChar)); if (buffer == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseNameComplex: out of memory\n"); return(NULL); } } COPY_BUF(l,buffer,len,c); NEXTL(l); c = CUR_CHAR(l); } buffer[len] = 0; return(buffer); } } return(xmlStrndup(buf, len)); } /** * xmlParseStringName: * @ctxt: an XML parser context * @str: a pointer to the string pointer (IN/OUT) * * parse an XML name. * * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | * CombiningChar | Extender * * [5] Name ::= (Letter | '_' | ':') (NameChar)* * * [6] Names ::= Name (S Name)* * * Returns the Name parsed or NULL. The @str pointer * is updated to the current location in the string. */ static xmlChar * xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { xmlChar buf[XML_MAX_NAMELEN + 5]; const xmlChar *cur = *str; int len = 0, l; int c; c = CUR_SCHAR(cur, l); if (!IS_LETTER(c) && (c != '_') && (c != ':')) { return(NULL); } while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) { COPY_BUF(l,buf,len,c); cur += l; c = CUR_SCHAR(cur, l); if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ /* * Okay someone managed to make a huge name, so he's ready to pay * for the processing speed. */ xmlChar *buffer; int max = len * 2; buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); if (buffer == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseStringName: out of memory\n"); return(NULL); } memcpy(buffer, buf, len); while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) { if (len + 10 > max) { max *= 2; buffer = (xmlChar *) xmlRealloc(buffer, max * sizeof(xmlChar)); if (buffer == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseStringName: out of memory\n"); return(NULL); } } COPY_BUF(l,buffer,len,c); cur += l; c = CUR_SCHAR(cur, l); } buffer[len] = 0; *str = cur; return(buffer); } } *str = cur; return(xmlStrndup(buf, len)); } /** * xmlParseNmtoken: * @ctxt: an XML parser context * * parse an XML Nmtoken. * * [7] Nmtoken ::= (NameChar)+ * * [8] Nmtokens ::= Nmtoken (S Nmtoken)* * * Returns the Nmtoken parsed or NULL */ xmlChar * xmlParseNmtoken(xmlParserCtxtPtr ctxt) { xmlChar buf[XML_MAX_NAMELEN + 5]; int len = 0, l; int c; int count = 0; GROW; c = CUR_CHAR(l); while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) { if (count++ > 100) { count = 0; GROW; } COPY_BUF(l,buf,len,c); NEXTL(l); c = CUR_CHAR(l); if (len >= XML_MAX_NAMELEN) { /* * Okay someone managed to make a huge token, so he's ready to pay * for the processing speed. */ xmlChar *buffer; int max = len * 2; buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); if (buffer == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseNmtoken: out of memory\n"); return(NULL); } memcpy(buffer, buf, len); while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) { if (count++ > 100) { count = 0; GROW; } if (len + 10 > max) { max *= 2; buffer = (xmlChar *) xmlRealloc(buffer, max * sizeof(xmlChar)); if (buffer == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseNmtoken: out of memory\n"); return(NULL); } } COPY_BUF(l,buffer,len,c); NEXTL(l); c = CUR_CHAR(l); } buffer[len] = 0; return(buffer); } } if (len == 0) return(NULL); return(xmlStrndup(buf, len)); } /** * xmlParseEntityValue: * @ctxt: an XML parser context * @orig: if non-NULL store a copy of the original entity value * * parse a value for ENTITY declarations * * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | * "'" ([^%&'] | PEReference | Reference)* "'" * * Returns the EntityValue parsed with reference substituted or NULL */ xmlChar * xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { xmlChar *buf = NULL; int len = 0; int size = XML_PARSER_BUFFER_SIZE; int c, l; xmlChar stop; xmlChar *ret = NULL; const xmlChar *cur = NULL; xmlParserInputPtr input; if (RAW == '"') stop = '"'; else if (RAW == '\'') stop = '\''; else { ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return(NULL); } buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); if (buf == NULL) { xmlGenericError(xmlGenericErrorContext, "malloc of %d byte failed\n", size); return(NULL); } /* * The content of the entity definition is copied in a buffer. */ ctxt->instate = XML_PARSER_ENTITY_VALUE; input = ctxt->input; GROW; NEXT; c = CUR_CHAR(l); /* * NOTE: 4.4.5 Included in Literal * When a parameter entity reference appears in a literal entity * value, ... a single or double quote character in the replacement * text is always treated as a normal data character and will not * terminate the literal. * In practice it means we stop the loop only when back at parsing * the initial entity and the quote is found */ while ((IS_CHAR(c)) && ((c != stop) || /* checked */ (ctxt->input != input))) { if (len + 5 >= size) { size *= 2; buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); if (buf == NULL) { xmlGenericError(xmlGenericErrorContext, "realloc of %d byte failed\n", size); return(NULL); } } COPY_BUF(l,buf,len,c); NEXTL(l); /* * Pop-up of finished entities. */ while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ xmlPopInput(ctxt); GROW; c = CUR_CHAR(l); if (c == 0) { GROW; c = CUR_CHAR(l); } } buf[len] = 0; /* * Raise problem w.r.t. '&' and '%' being used in non-entities * reference constructs. Note Charref will be handled in * xmlStringDecodeEntities() */ cur = buf; while (*cur != 0) { /* non input consuming */ if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { xmlChar *name; xmlChar tmp = *cur; cur++; name = xmlParseStringName(ctxt, &cur); if ((name == NULL) || (*cur != ';')) { ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "EntityValue: '%c' forbidden except for entities references\n", tmp); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } if ((tmp == '%') && (ctxt->inSubset == 1) && (ctxt->inputNr == 1)) { ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "EntityValue: PEReferences forbidden in internal subset\n", tmp); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } if (name != NULL) xmlFree(name); } cur++; } /* * Then PEReference entities are substituted. */ if (c != stop) { ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; xmlFree(buf); } else { NEXT; /* * NOTE: 4.4.7 Bypassed * When a general entity reference appears in the EntityValue in * an entity declaration, it is bypassed and left as is. * so XML_SUBSTITUTE_REF is not set here. */ ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, 0, 0, 0); if (orig != NULL) *orig = buf; else xmlFree(buf); } return(ret); } /** * xmlParseAttValue: * @ctxt: an XML parser context * * parse a value for an attribute * Note: the parser won't do substitution of entities here, this * will be handled later in xmlStringGetNodeList * * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | * "'" ([^<&'] | Reference)* "'" * * 3.3.3 Attribute-Value Normalization: * Before the value of an attribute is passed to the application or * checked for validity, the XML processor must normalize it as follows: * - a character reference is processed by appending the referenced * character to the attribute value * - an entity reference is processed by recursively processing the * replacement text of the entity * - a whitespace character (#x20, #xD, #xA, #x9) is processed by * appending #x20 to the normalized value, except that only a single * #x20 is appended for a "#xD#xA" sequence that is part of an external * parsed entity or the literal entity value of an internal parsed entity * - other characters are processed by appending them to the normalized value * If the declared value is not CDATA, then the XML processor must further * process the normalized attribute value by discarding any leading and * trailing space (#x20) characters, and by replacing sequences of space * (#x20) characters by a single space (#x20) character. * All attributes for which no declaration has been read should be treated * by a non-validating parser as if declared CDATA. * * Returns the AttValue parsed or NULL. The value has to be freed by the caller. */ xmlChar * xmlParseAttValueComplex(xmlParserCtxtPtr ctxt); xmlChar * xmlParseAttValue(xmlParserCtxtPtr ctxt) { xmlChar limit = 0; const xmlChar *in = NULL; xmlChar *ret = NULL; SHRINK; GROW; in = (xmlChar *) CUR_PTR; if (*in != '"' && *in != '\'') { ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return(NULL); } ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; limit = *in; ++in; while (*in != limit && *in >= 0x20 && *in <= 0x7f && *in != '&' && *in != '<' ) { ++in; } if (*in != limit) { return xmlParseAttValueComplex(ctxt); } ++in; ret = xmlStrndup (CUR_PTR + 1, in - CUR_PTR - 2); CUR_PTR = in; return ret; } xmlChar * xmlParseAttValueComplex(xmlParserCtxtPtr ctxt) { xmlChar limit = 0; xmlChar *buf = NULL; int len = 0; int buf_size = 0; int c, l; xmlChar *current = NULL; xmlEntityPtr ent; SHRINK; if (NXT(0) == '"') { ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; limit = '"'; NEXT; } else if (NXT(0) == '\'') { limit = '\''; ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; NEXT; } else { ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return(NULL); } /* * allocate a translation buffer. */ buf_size = XML_PARSER_BUFFER_SIZE; buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar)); if (buf == NULL) { xmlGenericError(xmlGenericErrorContext, "xmlParseAttValue: malloc failed"); return(NULL); } /* * OK loop until we reach one of the ending char or a size limit. */ c = CUR_CHAR(l); while ((NXT(0) != limit) && /* checked */ (c != '<')) { if (c == 0) break; if (c == '&') { if (NXT(1) == '#') { int val = xmlParseCharRef(ctxt); if (val == '&') { if (ctxt->replaceEntities) { if (len > buf_size - 10) { growBuffer(buf); } buf[len++] = '&'; } else { /* * The reparsing will be done in xmlStringGetNodeList() * called by the attribute() function in SAX.c */ static xmlChar buffer[6] = "&"; if (len > buf_size - 10) { growBuffer(buf); } current = &buffer[0]; while (*current != 0) { /* non input consuming */ buf[len++] = *current++; } } } else { if (len > buf_size - 10) { growBuffer(buf); } len += xmlCopyChar(0, &buf[len], val); } } else { ent = xmlParseEntityRef(ctxt); if ((ent != NULL) && (ctxt->replaceEntities != 0)) { xmlChar *rep; if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { rep = xmlStringDecodeEntities(ctxt, ent->content, XML_SUBSTITUTE_REF, 0, 0, 0); if (rep != NULL) { current = rep; while (*current != 0) { /* non input consuming */ buf[len++] = *current++; if (len > buf_size - 10) { growBuffer(buf); } } xmlFree(rep); } } else { if (len > buf_size - 10) { growBuffer(buf); } if (ent->content != NULL) buf[len++] = ent->content[0]; } } else if (ent != NULL) { int i = xmlStrlen(ent->name); const xmlChar *cur = ent->name; /* * This may look absurd but is needed to detect * entities problems */ if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && (ent->content != NULL)) { xmlChar *rep; rep = xmlStringDecodeEntities(ctxt, ent->content, XML_SUBSTITUTE_REF, 0, 0, 0); if (rep != NULL) xmlFree(rep); } /* * Just output the reference */ buf[len++] = '&'; if (len > buf_size - i - 10) { growBuffer(buf); } for (;i > 0;i--) buf[len++] = *cur++; buf[len++] = ';'; } } } else { if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { COPY_BUF(l,buf,len,0x20); if (len > buf_size - 10) { growBuffer(buf); } } else { COPY_BUF(l,buf,len,c); if (len > buf_size - 10) { growBuffer(buf); } } NEXTL(l); } GROW; c = CUR_CHAR(l); } buf[len++] = 0; if (RAW == '<') { ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Unescaped '<' not allowed in attributes values\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } else if (RAW != limit) { ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } else NEXT; return(buf); } /** * xmlParseSystemLiteral: * @ctxt: an XML parser context * * parse an XML Literal * * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") * * Returns the SystemLiteral parsed or NULL */ xmlChar * xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; int len = 0; int size = XML_PARSER_BUFFER_SIZE; int cur, l; xmlChar stop; int state = ctxt->instate; int count = 0; SHRINK; if (RAW == '"') { NEXT; stop = '"'; } else if (RAW == '\'') { NEXT; stop = '\''; } else { ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return(NULL); } buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); if (buf == NULL) { xmlGenericError(xmlGenericErrorContext, "malloc of %d byte failed\n", size); return(NULL); } ctxt->instate = XML_PARSER_SYSTEM_LITERAL; cur = CUR_CHAR(l); while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ if (len + 5 >= size) { size *= 2; buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); if (buf == NULL) { xmlGenericError(xmlGenericErrorContext, "realloc of %d byte failed\n", size); ctxt->instate = (xmlParserInputState) state; return(NULL); } } count++; if (count > 50) { GROW; count = 0; } COPY_BUF(l,buf,len,cur); NEXTL(l); cur = CUR_CHAR(l); if (cur == 0) { GROW; SHRINK; cur = CUR_CHAR(l); } } buf[len] = 0; ctxt->instate = (xmlParserInputState) state; if (!IS_CHAR(cur)) { ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } else { NEXT; } return(buf); } /** * xmlParsePubidLiteral: * @ctxt: an XML parser context * * parse an XML public literal * * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" * * Returns the PubidLiteral parsed or NULL. */ xmlChar * xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; int len = 0; int size = XML_PARSER_BUFFER_SIZE; xmlChar cur; xmlChar stop; int count = 0; xmlParserInputState oldstate = ctxt->instate; SHRINK; if (RAW == '"') { NEXT; stop = '"'; } else if (RAW == '\'') { NEXT; stop = '\''; } else { ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; return(NULL); } buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); if (buf == NULL) { xmlGenericError(xmlGenericErrorContext, "malloc of %d byte failed\n", size); return(NULL); } ctxt->instate = XML_PARSER_PUBLIC_LITERAL; cur = CUR; while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */ if (len + 1 >= size) { size *= 2; buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); if (buf == NULL) { xmlGenericError(xmlGenericErrorContext, "realloc of %d byte failed\n", size); return(NULL); } } buf[len++] = cur; count++; if (count > 50) { GROW; count = 0; } NEXT; cur = CUR; if (cur == 0) { GROW; SHRINK; cur = CUR; } } buf[len] = 0; if (cur != stop) { ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } else { NEXT; } ctxt->instate = oldstate; return(buf); } void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); /** * xmlParseCharData: * @ctxt: an XML parser context * @cdata: int indicating whether we are within a CDATA section * * parse a CharData section. * if we are within a CDATA section ']]>' marks an end of section. * * The right angle bracket (>) may be represented using the string ">", * and must, for compatibility, be escaped using ">" or a character * reference when it appears in the string "]]>" in content, when that * string is not marking the end of a CDATA section. * * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */ void xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { const xmlChar *in; int nbchar = 0; int line = ctxt->input->line; int col = ctxt->input->col; SHRINK; GROW; /* * Accelerated common case where input don't need to be * modified before passing it to the handler. */ if (!cdata) { in = ctxt->input->cur; do { get_more: while (((*in >= 0x20) && (*in != '<') && (*in != ']') && (*in != '&') && (*in <= 0x7F)) || (*in == 0x09)) in++; if (*in == 0xA) { ctxt->input->line++; in++; while (*in == 0xA) { ctxt->input->line++; in++; } goto get_more; } if (*in == ']') { if ((in[1] == ']') && (in[2] == '>')) { ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Sequence ']]>' not allowed in content\n"); ctxt->input->cur = in; ctxt->wellFormed = 0; ctxt->disableSAX = 1; return; } in++; goto get_more; } nbchar = in - ctxt->input->cur; if (nbchar > 0) { if (IS_BLANK(*ctxt->input->cur)) { const xmlChar *tmp = ctxt->input->cur; ctxt->input->cur = in; if (areBlanks(ctxt, tmp, nbchar)) { if (ctxt->sax->ignorableWhitespace != NULL) ctxt->sax->ignorableWhitespace(ctxt->userData, tmp, nbchar); } else { if (ctxt->sax->characters != NULL) ctxt->sax->characters(ctxt->userData, tmp, nbchar); } line = ctxt->input->line; col = ctxt->input->col; } else { if (ctxt->sax->characters != NULL) ctxt->sax->characters(ctxt->userData, ctxt->input->cur, nbchar); line = ctxt->input->line; col = ctxt->input->col; } } ctxt->input->cur = in; if (*in == 0xD) { in++; if (*in == 0xA) { ctxt->input->cur = in; in++; ctxt->input->line++; continue; /* while */ } in--; } if (*in == '<') { return; } if (*in == '&') { return; } SHRINK; GROW; in = ctxt->input->cur; } while ((*in >= 0x20) && (*in <= 0x7F)); nbchar = 0; } ctxt->input->line = line; ctxt->input->col = col; xmlParseCharDataComplex(ctxt, cdata); } void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; int nbchar = 0; int cur, l; int count = 0; SHRINK; GROW; cur = CUR_CHAR(l); while ((cur != '<') && /* checked */ (cur != '&') && (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { if (cdata) break; else { ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Sequence ']]>' not allowed in content\n"); /* Should this be relaxed ??? I see a "must here */ ctxt->wellFormed = 0; ctxt->disableSAX = 1; } } COPY_BUF(l,buf,nbchar,cur); if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { /* * OK the segment is to be consumed as chars. */ if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { if (areBlanks(ctxt, buf, nbchar)) { if (ctxt->sax->ignorableWhitespace != NULL) ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); } else { if (ctxt->sax->characters != NULL) ctxt->sax->characters(ctxt->userData, buf, nbchar); } } nbchar = 0; } count++; if (count > 50) { GROW; count = 0; } NEXTL(l); cur = CUR_CHAR(l); } if (nbchar != 0) { /* * OK the segment is to be consumed as chars. */ if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { if (areBlanks(ctxt, buf, nbchar)) { if (ctxt->sax->ignorableWhitespace != NULL) ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); } else { if (ctxt->sax->characters != NULL) ctxt->sax->characters(ctxt->userData, buf, nbchar); } } } } /** * xmlParseExternalID: * @ctxt: an XML parser context * @publicID: a xmlChar** receiving PubidLiteral * @strict: indicate whether we should restrict parsing to only * production [75], see NOTE below * * Parse an External ID or a Public ID * * NOTE: Productions [75] and [83] interact badly since [75] can generate * 'PUBLIC' S PubidLiteral S SystemLiteral * * [75] ExternalID ::= 'SYSTEM' S SystemLiteral * | 'PUBLIC' S PubidLiteral S SystemLiteral * * [83] PublicID ::= 'PUBLIC' S PubidLiteral * * Returns the function returns SystemLiteral and in the second * case publicID receives PubidLiteral, is strict is off * it is possible to return NULL and have publicID set. */ xmlChar * xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { xmlChar *URI = NULL; SHRINK; *publicID = NULL; if ((RAW == 'S') && (NXT(1) == 'Y') && (NXT(2) == 'S') && (NXT(3) == 'T') && (NXT(4) == 'E') && (NXT(5) == 'M')) { SKIP(6); if (!IS_BLANK(CUR)) { ctxt->errNo = XML_ERR_SPACE_REQUIRED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Space required after 'SYSTEM'\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } SKIP_BLANKS; URI = xmlParseSystemLiteral(ctxt); if (URI == NULL) { ctxt->errNo = XML_ERR_URI_REQUIRED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseExternalID: SYSTEM, no URI\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } } else if ((RAW == 'P') && (NXT(1) == 'U') && (NXT(2) == 'B') && (NXT(3) == 'L') && (NXT(4) == 'I') && (NXT(5) == 'C')) { SKIP(6); if (!IS_BLANK(CUR)) { ctxt->errNo = XML_ERR_SPACE_REQUIRED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Space required after 'PUBLIC'\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } SKIP_BLANKS; *publicID = xmlParsePubidLiteral(ctxt); if (*publicID == NULL) { ctxt->errNo = XML_ERR_PUBID_REQUIRED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseExternalID: PUBLIC, no Public Identifier\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } if (strict) { /* * We don't handle [83] so "S SystemLiteral" is required. */ if (!IS_BLANK(CUR)) { ctxt->errNo = XML_ERR_SPACE_REQUIRED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Space required after the Public Identifier\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } } else { /* * We handle [83] so we return immediately, if * "S SystemLiteral" is not detected. From a purely parsing * point of view that's a nice mess. */ const xmlChar *ptr; GROW; ptr = CUR_PTR; if (!IS_BLANK(*ptr)) return(NULL); while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */ if ((*ptr != '\'') && (*ptr != '"')) return(NULL); } SKIP_BLANKS; URI = xmlParseSystemLiteral(ctxt); if (URI == NULL) { ctxt->errNo = XML_ERR_URI_REQUIRED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "xmlParseExternalID: PUBLIC, no URI\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } } return(URI); } /** * xmlParseComment: * @ctxt: an XML parser context * * Skip an XML (SGML) comment * The spec says that "For compatibility, the string "--" (double-hyphen) * must not occur within comments. " * * [15] Comment ::= '' */ void xmlParseComment(xmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; int len; int size = XML_PARSER_BUFFER_SIZE; int q, ql; int r, rl; int cur, l; xmlParserInputState state; xmlParserInputPtr input = ctxt->input; int count = 0; /* * Check that there is a comment right here. */ if ((RAW != '<') || (NXT(1) != '!') || (NXT(2) != '-') || (NXT(3) != '-')) return; state = ctxt->instate; ctxt->instate = XML_PARSER_COMMENT; SHRINK; SKIP(4); buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); if (buf == NULL) { xmlGenericError(xmlGenericErrorContext, "malloc of %d byte failed\n", size); ctxt->instate = state; return; } q = CUR_CHAR(ql); NEXTL(ql); r = CUR_CHAR(rl); NEXTL(rl); cur = CUR_CHAR(l); len = 0; while (IS_CHAR(cur) && /* checked */ ((cur != '>') || (r != '-') || (q != '-'))) { if ((r == '-') && (q == '-')) { ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Comment must not contain '--' (double-hyphen)`\n"); ctxt->wellFormed = 0; ctxt->disableSAX = 1; } if (len + 5 >= size) { size *= 2; buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); if (buf == NULL) { xmlGenericError(xmlGenericErrorContext, "realloc of %d byte failed\n", size); ctxt->instate = state; return; } } COPY_BUF(ql,buf,len,q); q = r; ql = rl; r = cur; rl = l; count++; if (count > 50) { GROW; count = 0; } NEXTL(l); cur = CUR_CHAR(l); if (cur == 0) { SHRINK; GROW; cur = CUR_CHAR(l); } } buf[len] = 0; if (!IS_CHAR(cur)) { ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Comment not terminated \n