diff options
Diffstat (limited to 'encoding.c')
-rw-r--r-- | encoding.c | 623 |
1 files changed, 229 insertions, 394 deletions
@@ -110,6 +110,9 @@ openIcuConverter(const char* name, int toUnicode) if (conv == NULL) return NULL; + conv->pivot_source = conv->pivot_buf; + conv->pivot_target = conv->pivot_buf; + conv->uconv = ucnv_open(name, &status); if (U_FAILURE(status)) goto error; @@ -354,8 +357,14 @@ UTF8ToUTF8(unsigned char* out, int *outlen, { int len; - if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL)) + if ((out == NULL) || (outlen == NULL) || (inlenb == NULL)) return(-1); + if (inb == NULL) { + /* inb == NULL means output is initialized. */ + *outlen = 0; + *inlenb = 0; + return(0); + } if (*outlen > *inlenb) { len = *inlenb; } else { @@ -1844,6 +1853,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, * @outlen: the length of @out * @in: a pointer to an array of ISO Latin 1 chars * @inlen: the length of @in + * @flush: if true, indicates end of input * * Returns 0 if success, or * -1 by lack of space, or @@ -1857,7 +1867,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, */ static int xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, - const unsigned char *in, int *inlen) { + const unsigned char *in, int *inlen, int flush) { const char *ucv_in = (const char *) in; char *ucv_out = (char *) out; UErrorCode err = U_ZERO_ERROR; @@ -1867,33 +1877,31 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, return(-1); } - /* - * TODO(jungshik) - * 1. is ucnv_convert(To|From)Algorithmic better? - * 2. had we better use an explicit pivot buffer? - * 3. error returned comes from 'fromUnicode' only even - * when toUnicode is true ! - */ if (toUnicode) { /* encoding => UTF-16 => UTF-8 */ ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, - &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, - 0, TRUE, &err); + &ucv_in, ucv_in + *inlen, cd->pivot_buf, + &cd->pivot_source, &cd->pivot_target, + cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err); } else { /* UTF-8 => UTF-16 => encoding */ ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, - &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, - 0, TRUE, &err); + &ucv_in, ucv_in + *inlen, cd->pivot_buf, + &cd->pivot_source, &cd->pivot_target, + cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err); } *inlen = ucv_in - (const char*) in; *outlen = ucv_out - (char *) out; - if (U_SUCCESS(err)) + if (U_SUCCESS(err)) { + /* reset pivot buf if this is the last call for input (flush==TRUE) */ + if (flush) + cd->pivot_source = cd->pivot_target = cd->pivot_buf; return 0; + } if (err == U_BUFFER_OVERFLOW_ERROR) return -1; if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) return -2; - /* if (err == U_TRUNCATED_CHAR_FOUND) */ return -3; } #endif /* LIBXML_ICU_ENABLED */ @@ -1904,6 +1912,64 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, * * ************************************************************************/ +static int +xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, + int *outlen, const unsigned char *in, int *inlen, int flush) { + int ret; + (void)flush; + + if (handler->input != NULL) { + ret = handler->input(out, outlen, in, inlen); + } +#ifdef LIBXML_ICONV_ENABLED + else if (handler->iconv_in != NULL) { + ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen); + } +#endif /* LIBXML_ICONV_ENABLED */ +#ifdef LIBXML_ICU_ENABLED + else if (handler->uconv_in != NULL) { + ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen, + flush); + } +#endif /* LIBXML_ICU_ENABLED */ + else { + *outlen = 0; + *inlen = 0; + ret = -2; + } + + return(ret); +} + +/* Returns -4 if no output function was found. */ +static int +xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out, + int *outlen, const unsigned char *in, int *inlen) { + int ret; + + if (handler->output != NULL) { + ret = handler->output(out, outlen, in, inlen); + } +#ifdef LIBXML_ICONV_ENABLED + else if (handler->iconv_out != NULL) { + ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen); + } +#endif /* LIBXML_ICONV_ENABLED */ +#ifdef LIBXML_ICU_ENABLED + else if (handler->uconv_out != NULL) { + ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen, + TRUE); + } +#endif /* LIBXML_ICU_ENABLED */ + else { + *outlen = 0; + *inlen = 0; + ret = -4; + } + + return(ret); +} + /** * xmlCharEncFirstLineInt: * @handler: char enconding transformation data structure @@ -1922,7 +1988,7 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, int xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in, int len) { - int ret = -2; + int ret; int written; int toconv; @@ -1953,33 +2019,13 @@ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, written = out->size - out->use - 1; } - if (handler->input != NULL) { - ret = handler->input(&out->content[out->use], &written, - in->content, &toconv); - xmlBufferShrink(in, toconv); - out->use += written; - out->content[out->use] = 0; - } -#ifdef LIBXML_ICONV_ENABLED - else if (handler->iconv_in != NULL) { - ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], - &written, in->content, &toconv); - xmlBufferShrink(in, toconv); - out->use += written; - out->content[out->use] = 0; - if (ret == -1) ret = -3; - } -#endif /* LIBXML_ICONV_ENABLED */ -#ifdef LIBXML_ICU_ENABLED - else if (handler->uconv_in != NULL) { - ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], - &written, in->content, &toconv); - xmlBufferShrink(in, toconv); - out->use += written; - out->content[out->use] = 0; - if (ret == -1) ret = -3; - } -#endif /* LIBXML_ICU_ENABLED */ + ret = xmlEncInputChunk(handler, &out->content[out->use], &written, + in->content, &toconv, 0); + xmlBufferShrink(in, toconv); + out->use += written; + out->content[out->use] = 0; + if (ret == -1) ret = -3; + #ifdef DEBUG_ENCODING switch (ret) { case 0: @@ -2049,7 +2095,7 @@ xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, int xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len) { - int ret = -2; + int ret; size_t written; size_t toconv; int c_in; @@ -2091,32 +2137,13 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len) c_in = toconv; c_out = written; - if (input->encoder->input != NULL) { - ret = input->encoder->input(xmlBufEnd(out), &c_out, - xmlBufContent(in), &c_in); - xmlBufShrink(in, c_in); - xmlBufAddLen(out, c_out); - } -#ifdef LIBXML_ICONV_ENABLED - else if (input->encoder->iconv_in != NULL) { - ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out), - &c_out, xmlBufContent(in), &c_in); - xmlBufShrink(in, c_in); - xmlBufAddLen(out, c_out); - if (ret == -1) - ret = -3; - } -#endif /* LIBXML_ICONV_ENABLED */ -#ifdef LIBXML_ICU_ENABLED - else if (input->encoder->uconv_in != NULL) { - ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out), - &c_out, xmlBufContent(in), &c_in); - xmlBufShrink(in, c_in); - xmlBufAddLen(out, c_out); - if (ret == -1) - ret = -3; - } -#endif /* LIBXML_ICU_ENABLED */ + ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out, + xmlBufContent(in), &c_in, 0); + xmlBufShrink(in, c_in); + xmlBufAddLen(out, c_out); + if (ret == -1) + ret = -3; + switch (ret) { case 0: #ifdef DEBUG_ENCODING @@ -2175,7 +2202,7 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len) int xmlCharEncInput(xmlParserInputBufferPtr input, int flush) { - int ret = -2; + int ret; size_t written; size_t toconv; int c_in; @@ -2208,32 +2235,13 @@ xmlCharEncInput(xmlParserInputBufferPtr input, int flush) c_in = toconv; c_out = written; - if (input->encoder->input != NULL) { - ret = input->encoder->input(xmlBufEnd(out), &c_out, - xmlBufContent(in), &c_in); - xmlBufShrink(in, c_in); - xmlBufAddLen(out, c_out); - } -#ifdef LIBXML_ICONV_ENABLED - else if (input->encoder->iconv_in != NULL) { - ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out), - &c_out, xmlBufContent(in), &c_in); - xmlBufShrink(in, c_in); - xmlBufAddLen(out, c_out); - if (ret == -1) - ret = -3; - } -#endif /* LIBXML_ICONV_ENABLED */ -#ifdef LIBXML_ICU_ENABLED - else if (input->encoder->uconv_in != NULL) { - ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out), - &c_out, xmlBufContent(in), &c_in); - xmlBufShrink(in, c_in); - xmlBufAddLen(out, c_out); - if (ret == -1) - ret = -3; - } -#endif /* LIBXML_ICU_ENABLED */ + ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out, + xmlBufContent(in), &c_in, flush); + xmlBufShrink(in, c_in); + xmlBufAddLen(out, c_out); + if (ret == -1) + ret = -3; + switch (ret) { case 0: #ifdef DEBUG_ENCODING @@ -2294,7 +2302,7 @@ int xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, xmlBufferPtr in) { - int ret = -2; + int ret; int written; int toconv; @@ -2313,35 +2321,14 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, xmlBufferGrow(out, out->size + toconv * 2); written = out->size - out->use - 1; } - if (handler->input != NULL) { - ret = handler->input(&out->content[out->use], &written, - in->content, &toconv); - xmlBufferShrink(in, toconv); - out->use += written; - out->content[out->use] = 0; - } -#ifdef LIBXML_ICONV_ENABLED - else if (handler->iconv_in != NULL) { - ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], - &written, in->content, &toconv); - xmlBufferShrink(in, toconv); - out->use += written; - out->content[out->use] = 0; - if (ret == -1) - ret = -3; - } -#endif /* LIBXML_ICONV_ENABLED */ -#ifdef LIBXML_ICU_ENABLED - else if (handler->uconv_in != NULL) { - ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], - &written, in->content, &toconv); - xmlBufferShrink(in, toconv); - out->use += written; - out->content[out->use] = 0; - if (ret == -1) - ret = -3; - } -#endif /* LIBXML_ICU_ENABLED */ + ret = xmlEncInputChunk(handler, &out->content[out->use], &written, + in->content, &toconv, 1); + xmlBufferShrink(in, toconv); + out->use += written; + out->content[out->use] = 0; + if (ret == -1) + ret = -3; + switch (ret) { case 0: #ifdef DEBUG_ENCODING @@ -2405,7 +2392,7 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, int xmlCharEncOutput(xmlOutputBufferPtr output, int init) { - int ret = -2; + int ret; size_t written; size_t writtentot = 0; size_t toconv; @@ -2413,7 +2400,6 @@ xmlCharEncOutput(xmlOutputBufferPtr output, int init) int c_out; xmlBufPtr in; xmlBufPtr out; - int charref_len = 0; if ((output == NULL) || (output->encoder == NULL) || (output->buffer == NULL) || (output->conv == NULL)) @@ -2433,26 +2419,10 @@ retry: if (init) { c_in = 0; c_out = written; - if (output->encoder->output != NULL) { - ret = output->encoder->output(xmlBufEnd(out), &c_out, - NULL, &c_in); - if (ret > 0) /* Gennady: check return value */ - xmlBufAddLen(out, c_out); - } -#ifdef LIBXML_ICONV_ENABLED - else if (output->encoder->iconv_out != NULL) { - ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out), - &c_out, NULL, &c_in); - xmlBufAddLen(out, c_out); - } -#endif /* LIBXML_ICONV_ENABLED */ -#ifdef LIBXML_ICU_ENABLED - else if (output->encoder->uconv_out != NULL) { - ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out), - &c_out, NULL, &c_in); - xmlBufAddLen(out, c_out); - } -#endif /* LIBXML_ICU_ENABLED */ + /* TODO: Check return value. */ + xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, + NULL, &c_in); + xmlBufAddLen(out, c_out); #ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, "initialized encoder\n"); @@ -2477,57 +2447,17 @@ retry: c_in = toconv; c_out = written; - if (output->encoder->output != NULL) { - ret = output->encoder->output(xmlBufEnd(out), &c_out, - xmlBufContent(in), &c_in); + ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, + xmlBufContent(in), &c_in); + xmlBufShrink(in, c_in); + xmlBufAddLen(out, c_out); + writtentot += c_out; + if (ret == -1) { if (c_out > 0) { - xmlBufShrink(in, c_in); - xmlBufAddLen(out, c_out); - writtentot += c_out; + /* Can be a limitation of iconv or uconv */ + goto retry; } - } -#ifdef LIBXML_ICONV_ENABLED - else if (output->encoder->iconv_out != NULL) { - ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out), - &c_out, xmlBufContent(in), &c_in); - xmlBufShrink(in, c_in); - xmlBufAddLen(out, c_out); - writtentot += c_out; - if (ret == -1) { - if (c_out > 0) { - /* - * Can be a limitation of iconv - */ - charref_len = 0; - goto retry; - } - ret = -3; - } - } -#endif /* LIBXML_ICONV_ENABLED */ -#ifdef LIBXML_ICU_ENABLED - else if (output->encoder->uconv_out != NULL) { - ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out), - &c_out, xmlBufContent(in), &c_in); - xmlBufShrink(in, c_in); - xmlBufAddLen(out, c_out); - writtentot += c_out; - if (ret == -1) { - if (c_out > 0) { - /* - * Can be a limitation of uconv - */ - charref_len = 0; - goto retry; - } - ret = -3; - } - } -#endif /* LIBXML_ICU_ENABLED */ - else { - xmlEncodingErr(XML_I18N_NO_OUTPUT, - "xmlCharEncOutFunc: no output function !\n", NULL); - return(-1); + ret = -3; } if (ret >= 0) output += ret; @@ -2555,47 +2485,44 @@ retry: c_in, c_out, (int) xmlBufUse(in)); #endif break; + case -4: + xmlEncodingErr(XML_I18N_NO_OUTPUT, + "xmlCharEncOutFunc: no output function !\n", NULL); + ret = -1; + break; case -2: { + xmlChar charref[20]; int len = (int) xmlBufUse(in); xmlChar *content = xmlBufContent(in); - int cur; + int cur, charrefLen; cur = xmlGetUTF8Char(content, &len); - if ((charref_len != 0) && (c_out < charref_len)) { - /* - * We attempted to insert a character reference and failed. - * Undo what was written and skip the remaining charref. - */ - xmlBufErase(out, c_out); - writtentot -= c_out; - xmlBufShrink(in, charref_len - c_out); - charref_len = 0; - - ret = -1; + if (cur <= 0) break; - } else if (cur > 0) { - xmlChar charref[20]; #ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext, - "handling output conversion error\n"); - xmlGenericError(xmlGenericErrorContext, - "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", - content[0], content[1], - content[2], content[3]); + xmlGenericError(xmlGenericErrorContext, + "handling output conversion error\n"); + xmlGenericError(xmlGenericErrorContext, + "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + content[0], content[1], + content[2], content[3]); #endif - /* - * Removes the UTF8 sequence, and replace it by a charref - * and continue the transcoding phase, hoping the error - * did not mangle the encoder state. - */ - charref_len = snprintf((char *) &charref[0], sizeof(charref), - "&#%d;", cur); - xmlBufShrink(in, len); - xmlBufAddHead(in, charref, -1); - - goto retry; - } else { + /* + * Removes the UTF8 sequence, and replace it by a charref + * and continue the transcoding phase, hoping the error + * did not mangle the encoder state. + */ + charrefLen = snprintf((char *) &charref[0], sizeof(charref), + "&#%d;", cur); + xmlBufShrink(in, len); + xmlBufGrow(out, charrefLen * 4); + c_out = xmlBufAvail(out) - 1; + c_in = charrefLen; + ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out, + charref, &c_in); + + if ((ret < 0) || (c_in != charrefLen)) { char buf[50]; snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", @@ -2607,8 +2534,12 @@ retry: buf); if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE) content[0] = ' '; + break; } - break; + + xmlBufAddLen(out, c_out); + writtentot += c_out; + goto retry; } } return(ret); @@ -2636,12 +2567,11 @@ retry: int xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, xmlBufferPtr in) { - int ret = -2; + int ret; int written; int writtentot = 0; int toconv; int output = 0; - int charref_len = 0; if (handler == NULL) return(-1); if (out == NULL) return(-1); @@ -2658,31 +2588,11 @@ retry: */ if (in == NULL) { toconv = 0; - if (handler->output != NULL) { - ret = handler->output(&out->content[out->use], &written, - NULL, &toconv); - if (ret >= 0) { /* Gennady: check return value */ - out->use += written; - out->content[out->use] = 0; - } - } -#ifdef LIBXML_ICONV_ENABLED - else if (handler->iconv_out != NULL) { - ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], - &written, NULL, &toconv); - out->use += written; - out->content[out->use] = 0; - } -#endif /* LIBXML_ICONV_ENABLED */ -#ifdef LIBXML_ICU_ENABLED - else if (handler->uconv_out != NULL) { - ret = xmlUconvWrapper(handler->uconv_out, 0, - &out->content[out->use], - &written, NULL, &toconv); - out->use += written; - out->content[out->use] = 0; - } -#endif /* LIBXML_ICU_ENABLED */ + /* TODO: Check return value. */ + xmlEncOutputChunk(handler, &out->content[out->use], &written, + NULL, &toconv); + out->use += written; + out->content[out->use] = 0; #ifdef DEBUG_ENCODING xmlGenericError(xmlGenericErrorContext, "initialized encoder\n"); @@ -2700,61 +2610,18 @@ retry: xmlBufferGrow(out, toconv * 4); written = out->size - out->use - 1; } - if (handler->output != NULL) { - ret = handler->output(&out->content[out->use], &written, - in->content, &toconv); - if (written > 0) { - xmlBufferShrink(in, toconv); - out->use += written; - writtentot += written; - } - out->content[out->use] = 0; - } -#ifdef LIBXML_ICONV_ENABLED - else if (handler->iconv_out != NULL) { - ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], - &written, in->content, &toconv); - xmlBufferShrink(in, toconv); - out->use += written; - writtentot += written; - out->content[out->use] = 0; - if (ret == -1) { - if (written > 0) { - /* - * Can be a limitation of iconv - */ - charref_len = 0; - goto retry; - } - ret = -3; - } - } -#endif /* LIBXML_ICONV_ENABLED */ -#ifdef LIBXML_ICU_ENABLED - else if (handler->uconv_out != NULL) { - ret = xmlUconvWrapper(handler->uconv_out, 0, - &out->content[out->use], - &written, in->content, &toconv); - xmlBufferShrink(in, toconv); - out->use += written; - writtentot += written; - out->content[out->use] = 0; - if (ret == -1) { - if (written > 0) { - /* - * Can be a limitation of iconv - */ - charref_len = 0; - goto retry; - } - ret = -3; - } - } -#endif /* LIBXML_ICU_ENABLED */ - else { - xmlEncodingErr(XML_I18N_NO_OUTPUT, - "xmlCharEncOutFunc: no output function !\n", NULL); - return(-1); + ret = xmlEncOutputChunk(handler, &out->content[out->use], &written, + in->content, &toconv); + xmlBufferShrink(in, toconv); + out->use += written; + writtentot += written; + out->content[out->use] = 0; + if (ret == -1) { + if (written > 0) { + /* Can be a limitation of iconv or uconv */ + goto retry; + } + ret = -3; } if (ret >= 0) output += ret; @@ -2782,47 +2649,44 @@ retry: toconv, written, in->use); #endif break; + case -4: + xmlEncodingErr(XML_I18N_NO_OUTPUT, + "xmlCharEncOutFunc: no output function !\n", NULL); + ret = -1; + break; case -2: { + xmlChar charref[20]; int len = in->use; const xmlChar *utf = (const xmlChar *) in->content; - int cur; + int cur, charrefLen; cur = xmlGetUTF8Char(utf, &len); - if ((charref_len != 0) && (written < charref_len)) { - /* - * We attempted to insert a character reference and failed. - * Undo what was written and skip the remaining charref. - */ - out->use -= written; - writtentot -= written; - xmlBufferShrink(in, charref_len - written); - charref_len = 0; - - ret = -1; + if (cur <= 0) break; - } else if (cur > 0) { - xmlChar charref[20]; #ifdef DEBUG_ENCODING - xmlGenericError(xmlGenericErrorContext, - "handling output conversion error\n"); - xmlGenericError(xmlGenericErrorContext, - "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", - in->content[0], in->content[1], - in->content[2], in->content[3]); + xmlGenericError(xmlGenericErrorContext, + "handling output conversion error\n"); + xmlGenericError(xmlGenericErrorContext, + "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", + in->content[0], in->content[1], + in->content[2], in->content[3]); #endif - /* - * Removes the UTF8 sequence, and replace it by a charref - * and continue the transcoding phase, hoping the error - * did not mangle the encoder state. - */ - charref_len = snprintf((char *) &charref[0], sizeof(charref), - "&#%d;", cur); - xmlBufferShrink(in, len); - xmlBufferAddHead(in, charref, -1); - - goto retry; - } else { + /* + * Removes the UTF8 sequence, and replace it by a charref + * and continue the transcoding phase, hoping the error + * did not mangle the encoder state. + */ + charrefLen = snprintf((char *) &charref[0], sizeof(charref), + "&#%d;", cur); + xmlBufferShrink(in, len); + xmlBufferGrow(out, charrefLen * 4); + written = out->size - out->use - 1; + toconv = charrefLen; + ret = xmlEncOutputChunk(handler, &out->content[out->use], &written, + charref, &toconv); + + if ((ret < 0) || (toconv != charrefLen)) { char buf[50]; snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", @@ -2834,8 +2698,13 @@ retry: buf); if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE) in->content[0] = ' '; + break; } - break; + + out->use += written; + writtentot += written; + out->content[out->use] = 0; + goto retry; } } return(ret); @@ -2954,54 +2823,20 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { int ret; - if (handler->output != NULL) { - do { - toconv = in->end - cur; - written = 32000; - ret = handler->output(&convbuf[0], &written, - cur, &toconv); - if (ret == -1) return(-1); - unused += written; - cur += toconv; - } while (ret == -2); -#ifdef LIBXML_ICONV_ENABLED - } else if (handler->iconv_out != NULL) { - do { - toconv = in->end - cur; - written = 32000; - ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0], - &written, cur, &toconv); - if (ret < 0) { - if (written > 0) - ret = -2; - else - return(-1); - } - unused += written; - cur += toconv; - } while (ret == -2); -#endif -#ifdef LIBXML_ICU_ENABLED - } else if (handler->uconv_out != NULL) { - do { - toconv = in->end - cur; - written = 32000; - ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], - &written, cur, &toconv); - if (ret < 0) { - if (written > 0) - ret = -2; - else - return(-1); - } - unused += written; - cur += toconv; - } while (ret == -2); -#endif - } else { - /* could not find a converter */ - return(-1); - } + do { + toconv = in->end - cur; + written = 32000; + ret = xmlEncOutputChunk(handler, &convbuf[0], &written, + cur, &toconv); + if (ret < 0) { + if (written > 0) + ret = -2; + else + return(-1); + } + unused += written; + cur += toconv; + } while (ret == -2); } if (in->buf->rawconsumed < unused) return(-1); |