diff options
author | Dianne Hackborn <hackbod@google.com> | 2013-07-31 16:04:39 -0700 |
---|---|---|
committer | Dianne Hackborn <hackbod@google.com> | 2013-07-31 16:04:39 -0700 |
commit | 0f10d0abf3e6f6b5631c091256f8b4e7a20a33d0 (patch) | |
tree | 73f787e480f5c56f78652b3455ad6f40664e8d7b | |
parent | d43eae5e399a1e6464def8227083988c0a090dee (diff) | |
download | system_core-0f10d0abf3e6f6b5631c091256f8b4e7a20a33d0.tar.gz system_core-0f10d0abf3e6f6b5631c091256f8b4e7a20a33d0.tar.bz2 system_core-0f10d0abf3e6f6b5631c091256f8b4e7a20a33d0.zip |
Add a new utf8-to-utf16 conversion function.
Change-Id: I957c22fb219596ca4239db7a169473d3894b09eb
-rw-r--r-- | include/utils/Unicode.h | 7 | ||||
-rw-r--r-- | libs/utils/Unicode.cpp | 30 |
2 files changed, 37 insertions, 0 deletions
diff --git a/include/utils/Unicode.h b/include/utils/Unicode.h index 927353377..c8c87c326 100644 --- a/include/utils/Unicode.h +++ b/include/utils/Unicode.h @@ -163,6 +163,13 @@ char16_t* utf8_to_utf16_no_null_terminator(const uint8_t* src, size_t srcLen, ch */ void utf8_to_utf16(const uint8_t* src, size_t srcLen, char16_t* dst); +/** + * Like utf8_to_utf16_no_null_terminator, but you can supply a maximum length of the + * decoded string. The decoded string will fill up to that length; if it is longer + * the returned pointer will be to the character after dstLen. + */ +char16_t* utf8_to_utf16_n(const uint8_t* src, size_t srcLen, char16_t* dst, size_t dstLen); + } #endif diff --git a/libs/utils/Unicode.cpp b/libs/utils/Unicode.cpp index 41cbf035e..a66e3bbbb 100644 --- a/libs/utils/Unicode.cpp +++ b/libs/utils/Unicode.cpp @@ -573,4 +573,34 @@ void utf8_to_utf16(const uint8_t* u8str, size_t u8len, char16_t* u16str) { *end = 0; } +char16_t* utf8_to_utf16_n(const uint8_t* src, size_t srcLen, char16_t* dst, size_t dstLen) { + const uint8_t* const u8end = src + srcLen; + const uint8_t* u8cur = src; + const uint16_t* const u16end = dst + dstLen; + char16_t* u16cur = dst; + + while (u8cur < u8end && u16cur < u16end) { + size_t u8len = utf8_codepoint_len(*u8cur); + uint32_t codepoint = utf8_to_utf32_codepoint(u8cur, u8len); + + // Convert the UTF32 codepoint to one or more UTF16 codepoints + if (codepoint <= 0xFFFF) { + // Single UTF16 character + *u16cur++ = (char16_t) codepoint; + } else { + // Multiple UTF16 characters with surrogates + codepoint = codepoint - 0x10000; + *u16cur++ = (char16_t) ((codepoint >> 10) + 0xD800); + if (u16cur >= u16end) { + // Ooops... not enough room for this surrogate pair. + return u16cur-1; + } + *u16cur++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00); + } + + u8cur += u8len; + } + return u16cur; +} + } |