aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWilliam M. Brack <wbrack@src.gnome.org>2003-07-29 04:28:04 +0000
committerWilliam M. Brack <wbrack@src.gnome.org>2003-07-29 04:28:04 +0000
commit4a557d97bfff5497500a6e707f7892cc4c092153 (patch)
tree8c202f653cc9e5ca2090a5da761745ba16c55117
parent9deb242b558cbcff45165866e0634a1962404885 (diff)
downloadandroid_external_libxml2-4a557d97bfff5497500a6e707f7892cc4c092153.tar.gz
android_external_libxml2-4a557d97bfff5497500a6e707f7892cc4c092153.tar.bz2
android_external_libxml2-4a557d97bfff5497500a6e707f7892cc4c092153.zip
fixed problem with comments reported by Nick Kew added routines
* HTMLparser.c: fixed problem with comments reported by Nick Kew * encoding.c: added routines xmlUTF8Size and xmlUTF8Charcmp for some future cleanup of UTF8 handling
-rw-r--r--ChangeLog6
-rw-r--r--HTMLparser.c5
-rw-r--r--encoding.c47
-rw-r--r--include/libxml/encoding.h6
4 files changed, 59 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index fe52aad6..ea4e735b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Tue Jul 29 12:28:17 HKT 2003 William Brack <wbrack@mmm.com.hk>
+
+ * HTMLparser.c: fixed problem with comments reported by Nick Kew
+ * encoding.c: added routines xmlUTF8Size and xmlUTF8Charcmp for
+ some future cleanup of UTF8 handling
+
Mon Jul 28 16:39:14 EDT 2003 Daniel Veillard <daniel@veillard.com>
* xpath.c: applied a change suggested by Sean Griffin in bug
diff --git a/HTMLparser.c b/HTMLparser.c
index e7dcb344..2ee458c6 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -4358,10 +4358,11 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
(buf[base + 2] == '-') && (buf[base + 3] == '-')) {
incomment = 1;
}
- /* do not increment base, some people use <!--> */
+ /* do not increment past <!, some people use <!--> */
+ base += 2;
}
if (incomment) {
- if (base + 3 < len)
+ if (base + 3 > len)
return(-1);
if ((buf[base] == '-') && (buf[base + 1] == '-') &&
(buf[base + 2] == '>')) {
diff --git a/encoding.c b/encoding.c
index 8d43f45d..5cefd39c 100644
--- a/encoding.c
+++ b/encoding.c
@@ -85,6 +85,53 @@ static int xmlLittleEndian = 1;
************************************************************************/
/**
+ * xmlUTF8Size:
+ * @utf: pointer to the UTF8 character
+ *
+ * returns the numbers of bytes in the character, -1 on format error
+ */
+int
+xmlUTF8Size(const xmlChar *utf) {
+ xmlChar mask;
+ int len;
+
+ if (utf == NULL)
+ return -1;
+ if (*utf < 0x80)
+ return 1;
+ /* check valid UTF8 character */
+ if (!(*utf & 0x40))
+ return -1;
+ /* determine number of bytes in char */
+ len = 2;
+ for (mask=0x20; mask != 0; mask>>=1) {
+ if (!(*utf & mask))
+ return len;
+ len++;
+ }
+ return -1;
+}
+
+/**
+ * xmlUTF8Charcmp
+ * @utf1: pointer to first UTF8 char
+ * @utf2: pointer to second UTF8 char
+ *
+ * returns result of comparing the two UCS4 values
+ * as with xmlStrncmp
+ */
+int
+xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
+
+ if (utf1 == NULL ) {
+ if (utf2 == NULL)
+ return 0;
+ return -1;
+ }
+ return xmlStrncmp(utf1, utf2, xsltUTF8Size(utf1));
+}
+
+/**
* xmlUTF8Strlen:
* @utf: a sequence of UTF-8 encoded bytes
*
diff --git a/include/libxml/encoding.h b/include/libxml/encoding.h
index 3c0fbb91..9841e14c 100644
--- a/include/libxml/encoding.h
+++ b/include/libxml/encoding.h
@@ -208,7 +208,6 @@ int xmlGetUTF8Char (const unsigned char *utf,
*/
int xmlCheckUTF8 (const unsigned char *utf);
-
int xmlUTF8Strsize (const xmlChar *utf,
int len);
xmlChar * xmlUTF8Strndup (const xmlChar *utf,
@@ -220,9 +219,10 @@ int xmlUTF8Strloc (const xmlChar *utf,
xmlChar * xmlUTF8Strsub (const xmlChar *utf,
int start,
int len);
-
int xmlUTF8Strlen (const xmlChar *utf);
-
+int xmlUTF8Size (const xmlChar *utf);
+int xmlUTF8Charcmp (const xmlChar *utf1,
+ const xmlChar *utf2);
#ifdef __cplusplus
}
#endif