aboutsummaryrefslogtreecommitdiffstats
path: root/encoding.c
diff options
context:
space:
mode:
Diffstat (limited to 'encoding.c')
-rw-r--r--encoding.c623
1 files changed, 229 insertions, 394 deletions
diff --git a/encoding.c b/encoding.c
index e49c7f89..de7b511a 100644
--- a/encoding.c
+++ b/encoding.c
@@ -110,6 +110,9 @@ openIcuConverter(const char* name, int toUnicode)
if (conv == NULL)
return NULL;
+ conv->pivot_source = conv->pivot_buf;
+ conv->pivot_target = conv->pivot_buf;
+
conv->uconv = ucnv_open(name, &status);
if (U_FAILURE(status))
goto error;
@@ -354,8 +357,14 @@ UTF8ToUTF8(unsigned char* out, int *outlen,
{
int len;
- if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
+ if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
return(-1);
+ if (inb == NULL) {
+ /* inb == NULL means output is initialized. */
+ *outlen = 0;
+ *inlenb = 0;
+ return(0);
+ }
if (*outlen > *inlenb) {
len = *inlenb;
} else {
@@ -1844,6 +1853,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
* @outlen: the length of @out
* @in: a pointer to an array of ISO Latin 1 chars
* @inlen: the length of @in
+ * @flush: if true, indicates end of input
*
* Returns 0 if success, or
* -1 by lack of space, or
@@ -1857,7 +1867,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
*/
static int
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
- const unsigned char *in, int *inlen) {
+ const unsigned char *in, int *inlen, int flush) {
const char *ucv_in = (const char *) in;
char *ucv_out = (char *) out;
UErrorCode err = U_ZERO_ERROR;
@@ -1867,33 +1877,31 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
return(-1);
}
- /*
- * TODO(jungshik)
- * 1. is ucnv_convert(To|From)Algorithmic better?
- * 2. had we better use an explicit pivot buffer?
- * 3. error returned comes from 'fromUnicode' only even
- * when toUnicode is true !
- */
if (toUnicode) {
/* encoding => UTF-16 => UTF-8 */
ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
- &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
- 0, TRUE, &err);
+ &ucv_in, ucv_in + *inlen, cd->pivot_buf,
+ &cd->pivot_source, &cd->pivot_target,
+ cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
} else {
/* UTF-8 => UTF-16 => encoding */
ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
- &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
- 0, TRUE, &err);
+ &ucv_in, ucv_in + *inlen, cd->pivot_buf,
+ &cd->pivot_source, &cd->pivot_target,
+ cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
}
*inlen = ucv_in - (const char*) in;
*outlen = ucv_out - (char *) out;
- if (U_SUCCESS(err))
+ if (U_SUCCESS(err)) {
+ /* reset pivot buf if this is the last call for input (flush==TRUE) */
+ if (flush)
+ cd->pivot_source = cd->pivot_target = cd->pivot_buf;
return 0;
+ }
if (err == U_BUFFER_OVERFLOW_ERROR)
return -1;
if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
return -2;
- /* if (err == U_TRUNCATED_CHAR_FOUND) */
return -3;
}
#endif /* LIBXML_ICU_ENABLED */
@@ -1904,6 +1912,64 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
* *
************************************************************************/
+static int
+xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
+ int *outlen, const unsigned char *in, int *inlen, int flush) {
+ int ret;
+ (void)flush;
+
+ if (handler->input != NULL) {
+ ret = handler->input(out, outlen, in, inlen);
+ }
+#ifdef LIBXML_ICONV_ENABLED
+ else if (handler->iconv_in != NULL) {
+ ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
+ }
+#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ else if (handler->uconv_in != NULL) {
+ ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
+ flush);
+ }
+#endif /* LIBXML_ICU_ENABLED */
+ else {
+ *outlen = 0;
+ *inlen = 0;
+ ret = -2;
+ }
+
+ return(ret);
+}
+
+/* Returns -4 if no output function was found. */
+static int
+xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
+ int *outlen, const unsigned char *in, int *inlen) {
+ int ret;
+
+ if (handler->output != NULL) {
+ ret = handler->output(out, outlen, in, inlen);
+ }
+#ifdef LIBXML_ICONV_ENABLED
+ else if (handler->iconv_out != NULL) {
+ ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
+ }
+#endif /* LIBXML_ICONV_ENABLED */
+#ifdef LIBXML_ICU_ENABLED
+ else if (handler->uconv_out != NULL) {
+ ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
+ TRUE);
+ }
+#endif /* LIBXML_ICU_ENABLED */
+ else {
+ *outlen = 0;
+ *inlen = 0;
+ ret = -4;
+ }
+
+ return(ret);
+}
+
/**
* xmlCharEncFirstLineInt:
* @handler: char enconding transformation data structure
@@ -1922,7 +1988,7 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
int
xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
xmlBufferPtr in, int len) {
- int ret = -2;
+ int ret;
int written;
int toconv;
@@ -1953,33 +2019,13 @@ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
written = out->size - out->use - 1;
}
- if (handler->input != NULL) {
- ret = handler->input(&out->content[out->use], &written,
- in->content, &toconv);
- xmlBufferShrink(in, toconv);
- out->use += written;
- out->content[out->use] = 0;
- }
-#ifdef LIBXML_ICONV_ENABLED
- else if (handler->iconv_in != NULL) {
- ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
- &written, in->content, &toconv);
- xmlBufferShrink(in, toconv);
- out->use += written;
- out->content[out->use] = 0;
- if (ret == -1) ret = -3;
- }
-#endif /* LIBXML_ICONV_ENABLED */
-#ifdef LIBXML_ICU_ENABLED
- else if (handler->uconv_in != NULL) {
- ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
- &written, in->content, &toconv);
- xmlBufferShrink(in, toconv);
- out->use += written;
- out->content[out->use] = 0;
- if (ret == -1) ret = -3;
- }
-#endif /* LIBXML_ICU_ENABLED */
+ ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
+ in->content, &toconv, 0);
+ xmlBufferShrink(in, toconv);
+ out->use += written;
+ out->content[out->use] = 0;
+ if (ret == -1) ret = -3;
+
#ifdef DEBUG_ENCODING
switch (ret) {
case 0:
@@ -2049,7 +2095,7 @@ xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
int
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
{
- int ret = -2;
+ int ret;
size_t written;
size_t toconv;
int c_in;
@@ -2091,32 +2137,13 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
c_in = toconv;
c_out = written;
- if (input->encoder->input != NULL) {
- ret = input->encoder->input(xmlBufEnd(out), &c_out,
- xmlBufContent(in), &c_in);
- xmlBufShrink(in, c_in);
- xmlBufAddLen(out, c_out);
- }
-#ifdef LIBXML_ICONV_ENABLED
- else if (input->encoder->iconv_in != NULL) {
- ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out),
- &c_out, xmlBufContent(in), &c_in);
- xmlBufShrink(in, c_in);
- xmlBufAddLen(out, c_out);
- if (ret == -1)
- ret = -3;
- }
-#endif /* LIBXML_ICONV_ENABLED */
-#ifdef LIBXML_ICU_ENABLED
- else if (input->encoder->uconv_in != NULL) {
- ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out),
- &c_out, xmlBufContent(in), &c_in);
- xmlBufShrink(in, c_in);
- xmlBufAddLen(out, c_out);
- if (ret == -1)
- ret = -3;
- }
-#endif /* LIBXML_ICU_ENABLED */
+ ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
+ xmlBufContent(in), &c_in, 0);
+ xmlBufShrink(in, c_in);
+ xmlBufAddLen(out, c_out);
+ if (ret == -1)
+ ret = -3;
+
switch (ret) {
case 0:
#ifdef DEBUG_ENCODING
@@ -2175,7 +2202,7 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
int
xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
{
- int ret = -2;
+ int ret;
size_t written;
size_t toconv;
int c_in;
@@ -2208,32 +2235,13 @@ xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
c_in = toconv;
c_out = written;
- if (input->encoder->input != NULL) {
- ret = input->encoder->input(xmlBufEnd(out), &c_out,
- xmlBufContent(in), &c_in);
- xmlBufShrink(in, c_in);
- xmlBufAddLen(out, c_out);
- }
-#ifdef LIBXML_ICONV_ENABLED
- else if (input->encoder->iconv_in != NULL) {
- ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out),
- &c_out, xmlBufContent(in), &c_in);
- xmlBufShrink(in, c_in);
- xmlBufAddLen(out, c_out);
- if (ret == -1)
- ret = -3;
- }
-#endif /* LIBXML_ICONV_ENABLED */
-#ifdef LIBXML_ICU_ENABLED
- else if (input->encoder->uconv_in != NULL) {
- ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out),
- &c_out, xmlBufContent(in), &c_in);
- xmlBufShrink(in, c_in);
- xmlBufAddLen(out, c_out);
- if (ret == -1)
- ret = -3;
- }
-#endif /* LIBXML_ICU_ENABLED */
+ ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
+ xmlBufContent(in), &c_in, flush);
+ xmlBufShrink(in, c_in);
+ xmlBufAddLen(out, c_out);
+ if (ret == -1)
+ ret = -3;
+
switch (ret) {
case 0:
#ifdef DEBUG_ENCODING
@@ -2294,7 +2302,7 @@ int
xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
xmlBufferPtr in)
{
- int ret = -2;
+ int ret;
int written;
int toconv;
@@ -2313,35 +2321,14 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
xmlBufferGrow(out, out->size + toconv * 2);
written = out->size - out->use - 1;
}
- if (handler->input != NULL) {
- ret = handler->input(&out->content[out->use], &written,
- in->content, &toconv);
- xmlBufferShrink(in, toconv);
- out->use += written;
- out->content[out->use] = 0;
- }
-#ifdef LIBXML_ICONV_ENABLED
- else if (handler->iconv_in != NULL) {
- ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
- &written, in->content, &toconv);
- xmlBufferShrink(in, toconv);
- out->use += written;
- out->content[out->use] = 0;
- if (ret == -1)
- ret = -3;
- }
-#endif /* LIBXML_ICONV_ENABLED */
-#ifdef LIBXML_ICU_ENABLED
- else if (handler->uconv_in != NULL) {
- ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
- &written, in->content, &toconv);
- xmlBufferShrink(in, toconv);
- out->use += written;
- out->content[out->use] = 0;
- if (ret == -1)
- ret = -3;
- }
-#endif /* LIBXML_ICU_ENABLED */
+ ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
+ in->content, &toconv, 1);
+ xmlBufferShrink(in, toconv);
+ out->use += written;
+ out->content[out->use] = 0;
+ if (ret == -1)
+ ret = -3;
+
switch (ret) {
case 0:
#ifdef DEBUG_ENCODING
@@ -2405,7 +2392,7 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
int
xmlCharEncOutput(xmlOutputBufferPtr output, int init)
{
- int ret = -2;
+ int ret;
size_t written;
size_t writtentot = 0;
size_t toconv;
@@ -2413,7 +2400,6 @@ xmlCharEncOutput(xmlOutputBufferPtr output, int init)
int c_out;
xmlBufPtr in;
xmlBufPtr out;
- int charref_len = 0;
if ((output == NULL) || (output->encoder == NULL) ||
(output->buffer == NULL) || (output->conv == NULL))
@@ -2433,26 +2419,10 @@ retry:
if (init) {
c_in = 0;
c_out = written;
- if (output->encoder->output != NULL) {
- ret = output->encoder->output(xmlBufEnd(out), &c_out,
- NULL, &c_in);
- if (ret > 0) /* Gennady: check return value */
- xmlBufAddLen(out, c_out);
- }
-#ifdef LIBXML_ICONV_ENABLED
- else if (output->encoder->iconv_out != NULL) {
- ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out),
- &c_out, NULL, &c_in);
- xmlBufAddLen(out, c_out);
- }
-#endif /* LIBXML_ICONV_ENABLED */
-#ifdef LIBXML_ICU_ENABLED
- else if (output->encoder->uconv_out != NULL) {
- ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out),
- &c_out, NULL, &c_in);
- xmlBufAddLen(out, c_out);
- }
-#endif /* LIBXML_ICU_ENABLED */
+ /* TODO: Check return value. */
+ xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
+ NULL, &c_in);
+ xmlBufAddLen(out, c_out);
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"initialized encoder\n");
@@ -2477,57 +2447,17 @@ retry:
c_in = toconv;
c_out = written;
- if (output->encoder->output != NULL) {
- ret = output->encoder->output(xmlBufEnd(out), &c_out,
- xmlBufContent(in), &c_in);
+ ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
+ xmlBufContent(in), &c_in);
+ xmlBufShrink(in, c_in);
+ xmlBufAddLen(out, c_out);
+ writtentot += c_out;
+ if (ret == -1) {
if (c_out > 0) {
- xmlBufShrink(in, c_in);
- xmlBufAddLen(out, c_out);
- writtentot += c_out;
+ /* Can be a limitation of iconv or uconv */
+ goto retry;
}
- }
-#ifdef LIBXML_ICONV_ENABLED
- else if (output->encoder->iconv_out != NULL) {
- ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out),
- &c_out, xmlBufContent(in), &c_in);
- xmlBufShrink(in, c_in);
- xmlBufAddLen(out, c_out);
- writtentot += c_out;
- if (ret == -1) {
- if (c_out > 0) {
- /*
- * Can be a limitation of iconv
- */
- charref_len = 0;
- goto retry;
- }
- ret = -3;
- }
- }
-#endif /* LIBXML_ICONV_ENABLED */
-#ifdef LIBXML_ICU_ENABLED
- else if (output->encoder->uconv_out != NULL) {
- ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out),
- &c_out, xmlBufContent(in), &c_in);
- xmlBufShrink(in, c_in);
- xmlBufAddLen(out, c_out);
- writtentot += c_out;
- if (ret == -1) {
- if (c_out > 0) {
- /*
- * Can be a limitation of uconv
- */
- charref_len = 0;
- goto retry;
- }
- ret = -3;
- }
- }
-#endif /* LIBXML_ICU_ENABLED */
- else {
- xmlEncodingErr(XML_I18N_NO_OUTPUT,
- "xmlCharEncOutFunc: no output function !\n", NULL);
- return(-1);
+ ret = -3;
}
if (ret >= 0) output += ret;
@@ -2555,47 +2485,44 @@ retry:
c_in, c_out, (int) xmlBufUse(in));
#endif
break;
+ case -4:
+ xmlEncodingErr(XML_I18N_NO_OUTPUT,
+ "xmlCharEncOutFunc: no output function !\n", NULL);
+ ret = -1;
+ break;
case -2: {
+ xmlChar charref[20];
int len = (int) xmlBufUse(in);
xmlChar *content = xmlBufContent(in);
- int cur;
+ int cur, charrefLen;
cur = xmlGetUTF8Char(content, &len);
- if ((charref_len != 0) && (c_out < charref_len)) {
- /*
- * We attempted to insert a character reference and failed.
- * Undo what was written and skip the remaining charref.
- */
- xmlBufErase(out, c_out);
- writtentot -= c_out;
- xmlBufShrink(in, charref_len - c_out);
- charref_len = 0;
-
- ret = -1;
+ if (cur <= 0)
break;
- } else if (cur > 0) {
- xmlChar charref[20];
#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "handling output conversion error\n");
- xmlGenericError(xmlGenericErrorContext,
- "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
- content[0], content[1],
- content[2], content[3]);
+ xmlGenericError(xmlGenericErrorContext,
+ "handling output conversion error\n");
+ xmlGenericError(xmlGenericErrorContext,
+ "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
+ content[0], content[1],
+ content[2], content[3]);
#endif
- /*
- * Removes the UTF8 sequence, and replace it by a charref
- * and continue the transcoding phase, hoping the error
- * did not mangle the encoder state.
- */
- charref_len = snprintf((char *) &charref[0], sizeof(charref),
- "&#%d;", cur);
- xmlBufShrink(in, len);
- xmlBufAddHead(in, charref, -1);
-
- goto retry;
- } else {
+ /*
+ * Removes the UTF8 sequence, and replace it by a charref
+ * and continue the transcoding phase, hoping the error
+ * did not mangle the encoder state.
+ */
+ charrefLen = snprintf((char *) &charref[0], sizeof(charref),
+ "&#%d;", cur);
+ xmlBufShrink(in, len);
+ xmlBufGrow(out, charrefLen * 4);
+ c_out = xmlBufAvail(out) - 1;
+ c_in = charrefLen;
+ ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
+ charref, &c_in);
+
+ if ((ret < 0) || (c_in != charrefLen)) {
char buf[50];
snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
@@ -2607,8 +2534,12 @@ retry:
buf);
if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
content[0] = ' ';
+ break;
}
- break;
+
+ xmlBufAddLen(out, c_out);
+ writtentot += c_out;
+ goto retry;
}
}
return(ret);
@@ -2636,12 +2567,11 @@ retry:
int
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
xmlBufferPtr in) {
- int ret = -2;
+ int ret;
int written;
int writtentot = 0;
int toconv;
int output = 0;
- int charref_len = 0;
if (handler == NULL) return(-1);
if (out == NULL) return(-1);
@@ -2658,31 +2588,11 @@ retry:
*/
if (in == NULL) {
toconv = 0;
- if (handler->output != NULL) {
- ret = handler->output(&out->content[out->use], &written,
- NULL, &toconv);
- if (ret >= 0) { /* Gennady: check return value */
- out->use += written;
- out->content[out->use] = 0;
- }
- }
-#ifdef LIBXML_ICONV_ENABLED
- else if (handler->iconv_out != NULL) {
- ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
- &written, NULL, &toconv);
- out->use += written;
- out->content[out->use] = 0;
- }
-#endif /* LIBXML_ICONV_ENABLED */
-#ifdef LIBXML_ICU_ENABLED
- else if (handler->uconv_out != NULL) {
- ret = xmlUconvWrapper(handler->uconv_out, 0,
- &out->content[out->use],
- &written, NULL, &toconv);
- out->use += written;
- out->content[out->use] = 0;
- }
-#endif /* LIBXML_ICU_ENABLED */
+ /* TODO: Check return value. */
+ xmlEncOutputChunk(handler, &out->content[out->use], &written,
+ NULL, &toconv);
+ out->use += written;
+ out->content[out->use] = 0;
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"initialized encoder\n");
@@ -2700,61 +2610,18 @@ retry:
xmlBufferGrow(out, toconv * 4);
written = out->size - out->use - 1;
}
- if (handler->output != NULL) {
- ret = handler->output(&out->content[out->use], &written,
- in->content, &toconv);
- if (written > 0) {
- xmlBufferShrink(in, toconv);
- out->use += written;
- writtentot += written;
- }
- out->content[out->use] = 0;
- }
-#ifdef LIBXML_ICONV_ENABLED
- else if (handler->iconv_out != NULL) {
- ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
- &written, in->content, &toconv);
- xmlBufferShrink(in, toconv);
- out->use += written;
- writtentot += written;
- out->content[out->use] = 0;
- if (ret == -1) {
- if (written > 0) {
- /*
- * Can be a limitation of iconv
- */
- charref_len = 0;
- goto retry;
- }
- ret = -3;
- }
- }
-#endif /* LIBXML_ICONV_ENABLED */
-#ifdef LIBXML_ICU_ENABLED
- else if (handler->uconv_out != NULL) {
- ret = xmlUconvWrapper(handler->uconv_out, 0,
- &out->content[out->use],
- &written, in->content, &toconv);
- xmlBufferShrink(in, toconv);
- out->use += written;
- writtentot += written;
- out->content[out->use] = 0;
- if (ret == -1) {
- if (written > 0) {
- /*
- * Can be a limitation of iconv
- */
- charref_len = 0;
- goto retry;
- }
- ret = -3;
- }
- }
-#endif /* LIBXML_ICU_ENABLED */
- else {
- xmlEncodingErr(XML_I18N_NO_OUTPUT,
- "xmlCharEncOutFunc: no output function !\n", NULL);
- return(-1);
+ ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
+ in->content, &toconv);
+ xmlBufferShrink(in, toconv);
+ out->use += written;
+ writtentot += written;
+ out->content[out->use] = 0;
+ if (ret == -1) {
+ if (written > 0) {
+ /* Can be a limitation of iconv or uconv */
+ goto retry;
+ }
+ ret = -3;
}
if (ret >= 0) output += ret;
@@ -2782,47 +2649,44 @@ retry:
toconv, written, in->use);
#endif
break;
+ case -4:
+ xmlEncodingErr(XML_I18N_NO_OUTPUT,
+ "xmlCharEncOutFunc: no output function !\n", NULL);
+ ret = -1;
+ break;
case -2: {
+ xmlChar charref[20];
int len = in->use;
const xmlChar *utf = (const xmlChar *) in->content;
- int cur;
+ int cur, charrefLen;
cur = xmlGetUTF8Char(utf, &len);
- if ((charref_len != 0) && (written < charref_len)) {
- /*
- * We attempted to insert a character reference and failed.
- * Undo what was written and skip the remaining charref.
- */
- out->use -= written;
- writtentot -= written;
- xmlBufferShrink(in, charref_len - written);
- charref_len = 0;
-
- ret = -1;
+ if (cur <= 0)
break;
- } else if (cur > 0) {
- xmlChar charref[20];
#ifdef DEBUG_ENCODING
- xmlGenericError(xmlGenericErrorContext,
- "handling output conversion error\n");
- xmlGenericError(xmlGenericErrorContext,
- "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
- in->content[0], in->content[1],
- in->content[2], in->content[3]);
+ xmlGenericError(xmlGenericErrorContext,
+ "handling output conversion error\n");
+ xmlGenericError(xmlGenericErrorContext,
+ "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
+ in->content[0], in->content[1],
+ in->content[2], in->content[3]);
#endif
- /*
- * Removes the UTF8 sequence, and replace it by a charref
- * and continue the transcoding phase, hoping the error
- * did not mangle the encoder state.
- */
- charref_len = snprintf((char *) &charref[0], sizeof(charref),
- "&#%d;", cur);
- xmlBufferShrink(in, len);
- xmlBufferAddHead(in, charref, -1);
-
- goto retry;
- } else {
+ /*
+ * Removes the UTF8 sequence, and replace it by a charref
+ * and continue the transcoding phase, hoping the error
+ * did not mangle the encoder state.
+ */
+ charrefLen = snprintf((char *) &charref[0], sizeof(charref),
+ "&#%d;", cur);
+ xmlBufferShrink(in, len);
+ xmlBufferGrow(out, charrefLen * 4);
+ written = out->size - out->use - 1;
+ toconv = charrefLen;
+ ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
+ charref, &toconv);
+
+ if ((ret < 0) || (toconv != charrefLen)) {
char buf[50];
snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
@@ -2834,8 +2698,13 @@ retry:
buf);
if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
in->content[0] = ' ';
+ break;
}
- break;
+
+ out->use += written;
+ writtentot += written;
+ out->content[out->use] = 0;
+ goto retry;
}
}
return(ret);
@@ -2954,54 +2823,20 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) {
int ret;
- if (handler->output != NULL) {
- do {
- toconv = in->end - cur;
- written = 32000;
- ret = handler->output(&convbuf[0], &written,
- cur, &toconv);
- if (ret == -1) return(-1);
- unused += written;
- cur += toconv;
- } while (ret == -2);
-#ifdef LIBXML_ICONV_ENABLED
- } else if (handler->iconv_out != NULL) {
- do {
- toconv = in->end - cur;
- written = 32000;
- ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
- &written, cur, &toconv);
- if (ret < 0) {
- if (written > 0)
- ret = -2;
- else
- return(-1);
- }
- unused += written;
- cur += toconv;
- } while (ret == -2);
-#endif
-#ifdef LIBXML_ICU_ENABLED
- } else if (handler->uconv_out != NULL) {
- do {
- toconv = in->end - cur;
- written = 32000;
- ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
- &written, cur, &toconv);
- if (ret < 0) {
- if (written > 0)
- ret = -2;
- else
- return(-1);
- }
- unused += written;
- cur += toconv;
- } while (ret == -2);
-#endif
- } else {
- /* could not find a converter */
- return(-1);
- }
+ do {
+ toconv = in->end - cur;
+ written = 32000;
+ ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
+ cur, &toconv);
+ if (ret < 0) {
+ if (written > 0)
+ ret = -2;
+ else
+ return(-1);
+ }
+ unused += written;
+ cur += toconv;
+ } while (ret == -2);
}
if (in->buf->rawconsumed < unused)
return(-1);