Add a new utf8-to-utf16 conversion function.

Change-Id: I957c22fb219596ca4239db7a169473d3894b09eb
author: Dianne Hackborn <hackbod@google.com> 2013-07-31 16:04:39 -0700
committer: Dianne Hackborn <hackbod@google.com> 2013-07-31 16:04:39 -0700
commit: 0f10d0abf3e6f6b5631c091256f8b4e7a20a33d0 (patch)
tree: 73f787e480f5c56f78652b3455ad6f40664e8d7b
parent: d43eae5e399a1e6464def8227083988c0a090dee (diff)
download: system_core-0f10d0abf3e6f6b5631c091256f8b4e7a20a33d0.tar.gz
system_core-0f10d0abf3e6f6b5631c091256f8b4e7a20a33d0.tar.bz2
system_core-0f10d0abf3e6f6b5631c091256f8b4e7a20a33d0.zip
2 files changed, 37 insertions, 0 deletions
diff --git a/include/utils/Unicode.h b/include/utils/Unicode.h
index 927353377..c8c87c326 100644
--- a/include/utils/Unicode.h
+++ b/include/utils/Unicode.h
@@ -163,6 +163,13 @@ char16_t* utf8_to_utf16_no_null_terminator(const uint8_t* src, size_t srcLen, ch
  */
 void utf8_to_utf16(const uint8_t* src, size_t srcLen, char16_t* dst);
 
+/**
+ * Like utf8_to_utf16_no_null_terminator, but you can supply a maximum length of the
+ * decoded string.  The decoded string will fill up to that length; if it is longer
+ * the returned pointer will be to the character after dstLen.
+ */
+char16_t* utf8_to_utf16_n(const uint8_t* src, size_t srcLen, char16_t* dst, size_t dstLen);
+
 }
 
 #endif
diff --git a/libs/utils/Unicode.cpp b/libs/utils/Unicode.cpp
index 41cbf035e..a66e3bbbb 100644
--- a/libs/utils/Unicode.cpp
+++ b/libs/utils/Unicode.cpp
@@ -573,4 +573,34 @@ void utf8_to_utf16(const uint8_t* u8str, size_t u8len, char16_t* u16str) {
     *end = 0;
 }
 
+char16_t* utf8_to_utf16_n(const uint8_t* src, size_t srcLen, char16_t* dst, size_t dstLen) {
+    const uint8_t* const u8end = src + srcLen;
+    const uint8_t* u8cur = src;
+    const uint16_t* const u16end = dst + dstLen;
+    char16_t* u16cur = dst;
+
+    while (u8cur < u8end && u16cur < u16end) {
+        size_t u8len = utf8_codepoint_len(*u8cur);
+        uint32_t codepoint = utf8_to_utf32_codepoint(u8cur, u8len);
+
+        // Convert the UTF32 codepoint to one or more UTF16 codepoints
+        if (codepoint <= 0xFFFF) {
+            // Single UTF16 character
+            *u16cur++ = (char16_t) codepoint;
+        } else {
+            // Multiple UTF16 characters with surrogates
+            codepoint = codepoint - 0x10000;
+            *u16cur++ = (char16_t) ((codepoint >> 10) + 0xD800);
+            if (u16cur >= u16end) {
+                // Ooops...  not enough room for this surrogate pair.
+                return u16cur-1;
+            }
+            *u16cur++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
+        }
+
+        u8cur += u8len;
+    }
+    return u16cur;
+}
+
 }
author	Dianne Hackborn <hackbod@google.com>	2013-07-31 16:04:39 -0700
committer	Dianne Hackborn <hackbod@google.com>	2013-07-31 16:04:39 -0700
commit	0f10d0abf3e6f6b5631c091256f8b4e7a20a33d0 (patch)
tree	73f787e480f5c56f78652b3455ad6f40664e8d7b
parent	d43eae5e399a1e6464def8227083988c0a090dee (diff)
download	system_core-0f10d0abf3e6f6b5631c091256f8b4e7a20a33d0.tar.gz system_core-0f10d0abf3e6f6b5631c091256f8b4e7a20a33d0.tar.bz2 system_core-0f10d0abf3e6f6b5631c091256f8b4e7a20a33d0.zip