aboutsummaryrefslogtreecommitdiffstats
path: root/encoding.c
diff options
context:
space:
mode:
Diffstat (limited to 'encoding.c')
-rw-r--r--encoding.c47
1 files changed, 47 insertions, 0 deletions
diff --git a/encoding.c b/encoding.c
index 8d43f45d..5cefd39c 100644
--- a/encoding.c
+++ b/encoding.c
@@ -85,6 +85,53 @@ static int xmlLittleEndian = 1;
************************************************************************/
/**
+ * xmlUTF8Size:
+ * @utf: pointer to the UTF8 character
+ *
+ * returns the numbers of bytes in the character, -1 on format error
+ */
+int
+xmlUTF8Size(const xmlChar *utf) {
+ xmlChar mask;
+ int len;
+
+ if (utf == NULL)
+ return -1;
+ if (*utf < 0x80)
+ return 1;
+ /* check valid UTF8 character */
+ if (!(*utf & 0x40))
+ return -1;
+ /* determine number of bytes in char */
+ len = 2;
+ for (mask=0x20; mask != 0; mask>>=1) {
+ if (!(*utf & mask))
+ return len;
+ len++;
+ }
+ return -1;
+}
+
+/**
+ * xmlUTF8Charcmp
+ * @utf1: pointer to first UTF8 char
+ * @utf2: pointer to second UTF8 char
+ *
+ * returns result of comparing the two UCS4 values
+ * as with xmlStrncmp
+ */
+int
+xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
+
+ if (utf1 == NULL ) {
+ if (utf2 == NULL)
+ return 0;
+ return -1;
+ }
+ return xmlStrncmp(utf1, utf2, xsltUTF8Size(utf1));
+}
+
+/**
* xmlUTF8Strlen:
* @utf: a sequence of UTF-8 encoded bytes
*